##### R CODE TO REPLICATE THE STATISTICAL ANALYSES AND FIGURES OF
##### Adrian Lucardi, Agustin Vallejo and German Feierherd, "Three Is a Crowd: Information and Electoral Coordination in Argentina," forthcoming at the British Journal of Political Science
##### Last date: 2025-11-07


## emptying the workspace
rm (list=ls ())


## loading & installing packages
if (!require ("pacman")) install.packages ("pacman")
pacman::p_load (cowplot
                , dplyr
                , fixest
                , ggcorrplot
                , ggplot2
                , ggpmisc
                , rdlocrand
                , rdpower
                , rdrobust
                , readxl
                , tidyr
                , tidytext ## to reorder the factors within years
                , tidyverse
                , wesanderson
                , writexl
                , xtable)


## display options
options (digits=4
         , scipen=999 ## disable sci notation
         , show.signif.stars=FALSE
         , max.print=2000
         , tibble.width=Inf
         , tibble.print_max=Inf
         , tibble.print_min=1)


## setting the working directory --> replace this with path to your own working directory
home <- "~/Dropbox/Current Projects/Primaries PBA/replication/"
setwd (home)


## own functions

# max() and min() functions that work well with NAs
max_na <- function (x) {
  if (all (is.na (x))==TRUE){ return (NA) 
  } else {return (max (x, na.rm=TRUE))} }
min_na <- function (x) {
  if (all (is.na (x))==TRUE){ return (NA) 
  } else {return (min (x, na.rm=TRUE))} } 

# extracting values of interest from a RD model
extract_rd <- function (x) {
 out <- c (
  as.character (round (x$coef[1], 2))
  , paste0 ("[", round (x$ci[3,1], 2), ":", round (x$ci[3,2], 2), "]")
  , paste0 (x$N_h[1], " | ", x$N_h[2])
  , as.character (round (x$bws[1,1], 1))
  , as.character (round (x$pv[3], 2))
  )
 return (out) }


## setting the graphical parameters
theme_set (theme_minimal (base_size=15))
size_text <- 3.75
col_movie <- "FrenchDispatch"
wes_palette (col_movie)
col_paso <- wes_palette (col_movie)[1]
col_gen <- wes_palette (col_movie)[2]
col_concurrent <- "gray67"
col_density_hist <- wes_palette (col_movie)[3]
shade_density_ci <- 0.4
shade_density_hist <- 0.25
alpha_values <- 0.125
jitter_w <- 1/5
jitter_h <- 5
size_tsline <- 1.1

# RD plots
col_cutoff <- "black"
col_bin <- wes_palette (col_movie)[2]
col_line <- wes_palette (col_movie)[4]
alpha_bin <- 1
place_text_x <- -42
place_text_y <- 95
size_bin <- 0.85




#############################################
#############################################
########## (1) Importing the data ###########
#############################################
#############################################

##### (1.1) By municipality-year ----

# full dataset
bymun_full <- read_csv (
  "Elecciones PBA (by muni) 1983-2023 - Wide.csv") %>% 
  mutate (
    municipio = factor (municipio)
    , inc_party = factor (inc_party)
    
    ## turnout and positive votes
    , turnout_log_paso = log (turnout_paso)
    , turnout_log_general = log (turnout_general)
    , positive_log_paso = log (positive_paso)
    , positive_log_general = log (positive_general)
    , sum_first2_log_paso = log (partido_1_paso + partido_2_paso)
    , sum_first2_log_general = log (partido_1_general + partido_2_general)
    , partido_golosov_log_general = log (partido_golosov_general)
    , partido_golosov_log_classif_paso = log (partido_golosov_classif_paso)
    , partido_enpv_log_general = log (partido_enpv_general)
    , partido_enpv_log_classif_paso = log (partido_enpv_classif_paso)
    , turnout_pct_delta = turnout_pct_general - turnout_pct_paso
    , turnout_log_delta = log (turnout_general / turnout_paso)
    , positive_pct_delta = positive_pct_general - positive_pct_paso ## in this case, classif votes REDUCE the number of positive votes in the primary, making it more likely to find a larger increase in the general election
    , positive_log_delta = log (positive_general / positive_paso)
    
    ## number of parties contesting
    , n_parties_log_paso = log (n_parties_paso)
    , n_parties_log_general = log (n_parties_general)
    , n_parties_delta = n_parties_general - n_parties_paso
    , n_parties_log_delta = n_parties_log_general - n_parties_log_paso
    , partido_golosov_delta = partido_golosov_general - partido_golosov_classif_paso
    , partido_golosov_log_delta = partido_golosov_log_general - partido_golosov_log_classif_paso
    , partido_enpv_delta = partido_enpv_general - partido_enpv_classif_paso
    , partido_enpv_log_delta = partido_enpv_log_general - partido_enpv_log_classif_paso
    
    ## parties by ranking
    , partido_1_pct_delta = partido_1_pct_general - partido_1_pct_classif_paso
    , partido_1_log_delta = log (partido_1_general / partido_1_classif_paso)
    , partido_2_pct_delta = partido_2_pct_general - partido_2_pct_classif_paso
    , partido_2_log_delta = log (partido_2_general / partido_2_classif_paso)
    , partido_3_pct_delta = partido_3_pct_general - partido_3_pct_classif_paso
    , partido_3_log_delta = log (partido_3_general / partido_3_classif_paso)
    , partido_4_pct_delta = partido_4_pct_general - partido_4_pct_classif_paso
    , partido_4_log_delta = log (partido_4_general / partido_4_classif_paso)
    
    ## margins
    , margin_12_pct_delta = margin_12_pct_general - margin_12_pct_classif_paso
    , margin_12_log_paso = log (partido_1_paso/partido_2_paso)
    , margin_12_log_general = log (partido_1_general/partido_2_general)
    , margin_23_pct_delta = margin_23_pct_general - margin_23_pct_classif_paso
    , margin_23_log_paso = log (partido_2_paso/partido_3_paso)
    , margin_23_log_general = log (partido_2_general/partido_3_general)
    
    ## sum of first N parties
    , sum_first2_pct_delta = sum_first2_pct_general - sum_first2_pct_classif_paso
    , sum_first2_log_delta = log ((partido_1_general + partido_2_general) / (partido_1_classif_paso + partido_2_classif_paso))
    , sum_first3_pct_delta = sum_first3_pct_general - sum_first3_pct_classif_paso
    , sum_first3_log_delta = log ((partido_1_general + partido_2_general + partido_3_general) / (partido_1_classif_paso + partido_2_classif_paso + partido_3_classif_paso))
    , sum_first4_pct_delta = sum_first4_pct_general - sum_first4_pct_classif_paso
    , sum_first4_log_delta = log ((partido_1_general + partido_2_general + partido_3_general + partido_4_general) / (partido_1_classif_paso + partido_2_classif_paso + partido_3_classif_paso + partido_4_classif_paso))
    
    ## number of council seats by party ranking
    , concejales_mg12_general = concejales_1_general - concejales_2_general
    , concejales_mg23_general = concejales_2_general - concejales_3_general
    , concejales_mg34_general = concejales_3_general - concejales_4_general
    , concejales_mg12_paso = concejales_1_paso - concejales_2_paso
    , concejales_mg23_paso = concejales_2_paso - concejales_3_paso
    , concejales_mg34_paso = concejales_3_paso - concejales_4_paso
    
    , concejales_1_delta = concejales_1_general - concejales_1_paso
    , concejales_2_delta = concejales_2_general - concejales_2_paso
    , concejales_3_delta = concejales_3_general - concejales_3_paso
    , concejales_4_delta = concejales_4_general - concejales_4_paso
    , concejales_1_pct_delta = concejales_1_pct_general - concejales_1_pct_paso
    , concejales_2_pct_delta = concejales_2_pct_general - concejales_2_pct_paso
    , concejales_3_pct_delta = concejales_3_pct_general - concejales_3_pct_paso
    , concejales_4_pct_delta = concejales_4_pct_general - concejales_4_pct_paso)
summary (bymun_full); dim (bymun_full)

# 2011-2023 only
bymun <- bymun_full %>% 
  filter (year %in% 2011:2023) %>% 
  left_join (
    bymun_full %>% 
      filter (year==2011) %>% 
      select (municipio, concejoSize) %>% 
      mutate (
        muni_size = ifelse (concejoSize <= 14, "small", "large")
        , muni_size = factor (muni_size, levels=c ("small", "large"))
        , concejoSize_2011 = concejoSize
        , concejoSize = NULL)
    , by=c ("municipio" = "municipio"))
summary (bymun)


##### (1.2) By municipality-year, long format ----
bymun_full_l <- bymun_full %>% 
  rename_with (~ gsub ("_general", "__general", .x, fixed = TRUE)) %>% ## need this to pivot_longer
  rename_with (~ gsub ("_paso", "__paso", .x, fixed = TRUE)) %>% 
  pivot_longer (
    cols = ends_with (c ("paso", "general"))
    , names_to = c (".value", "election_type")
    , names_sep ="__") %>% 
  filter (!(election_type=="paso" & year < 2011)) %>% 
  mutate (
    election_type = factor (election_type, levels=c ("paso", "general"))
    , year_type = ifelse (election_type=="paso", year - 1/3, year + 1/3))
summary (bymun_full_l); dim (bymun_full_l)



##### (1.3) By party-municipality-year (long version) ----

# full dataset
byparty_full_l <- read_csv (
  "Elecciones PBA (by party) 1983-2023 - Long.csv") %>% 
  mutate (
    election_type = factor (election_type, levels=c ("paso", "general"))
    , municipio = factor (municipio)
    , ref_party = factor (ref_party)
    , partyName = factor (partyName)
    , partyFactionName = factor (partyFactionName)
    , inc_party = factor (inc_party)
    , inc_party = factor (inc_party)) %>% 
  left_join (
    bymun_full %>% 
      filter (year==2011) %>% 
      select (municipio, concejoSize) %>% 
      mutate (
        muni_size = ifelse (concejoSize <= 14, "small", "large")
        , muni_size = factor (muni_size, levels=c ("small", "large"))
        , concejoSize = NULL)
    , by=c ("municipio" = "municipio"))
summary (byparty_full_l); dim (byparty_full_l)

# 2011-2023 & qualified parties only
byparty_l <- byparty_full_l %>% 
  filter (year %in% 2011:2023 & party_in_general==1)
nrow (byparty_l) ## 8,690



##### (1.4) By party-municipality-year (wide version) ----

# full dataset
byparty_full <- read_csv (
  "Elecciones PBA (by party) 1983-2023 - Wide.csv") %>% 
  mutate (
    
    ## updating some variables -> we'l need this for the RD analysis
    winner_general = case_when (
      year >= 2011 & party_in_general==0 ~ 0 ## a handful of parties that finished second or third in the primary dropped (eg, Exaltacion de la Cruz 2011). We need to account for them in the RD analysis
      , year < 2011 & votos_partido_general == partido_1_general ~ 1 ## lots of NA's here -> we'll need to get rid of them for the lags
      , year < 2011 & votos_partido_general < partido_1_general ~ 0
      , TRUE  ~ winner_general)
    , votos_partido_pct_general = case_when (
      year >= 2011 & party_in_general==0 ~ 0 ## ditto
      , year < 2011 ~ votos_partido_general / positive_general * 100
      , TRUE ~ votos_partido_pct_general)
    , rank_partido_classif_paso = case_when ( ## tie for the 3rd place. We manually code the Frente Renovador as the third placed party because (a) it was clearly a more important party at the province level; and (b) it was the third most voted party in the general election
      municipio=="CARLOS CASARES" & year == 2013 & partyName == "FRENTE RENOVADOR" ~ 3
      , municipio=="CARLOS CASARES" & year == 2013 & partyName == "UNION POPULAR" ~ 4
      , TRUE ~ rank_partido_classif_paso)
    , municipio = factor (municipio)
    , ref_party = factor (ref_party)
    , partyName = factor (partyName)
    , partyFactionName = factor (partyFactionName)
    , inc_party = factor (inc_party)
    , inc_party = factor (inc_party)) %>% 
  left_join (
    bymun_full %>% 
      filter (year==2011) %>% 
      select (municipio, concejoSize) %>% 
      mutate (
        muni_size = ifelse (concejoSize <= 14, "small", "large")
        , muni_size = factor (muni_size, levels=c ("small", "large"))
        , concejoSize = NULL)
    , by=c ("municipio" = "municipio"))
summary (byparty_full); dim (byparty_full)

# 2011-2023 only
byparty <- byparty_full %>% 
  filter (year %in% 2011:2023 & party_in_general==1)
nrow (byparty) ## 4,345



##### (1.5) By party-municipality-year (provisional results, different levels) ----
byparty_mesa <- read_csv (
  "Elecciones PBA (by party) 2011-2023 - Provisional.csv") %>% 
  mutate (
    level = level %>% factor (levels=c ("local", "provincial", "national"))
    , office = office %>% factor ()
    , type = type %>% factor (levels=c ("paso", "general")))
summary (byparty_mesa)




#################################################
#################################################
########## (2) Descriptive statistics ###########
#################################################
#################################################

#############################################
##### (2.1) Numbers reported in the text ----
#############################################

### (2.1.1) Time between primary and general ----
bymun %>% select (year, fecha_general, fecha_paso) %>% unique () %>% mutate (distance = fecha_general - fecha_paso)
# 63 to 77 days, 9 to 11 weeks (actually 10 to 11; 2021 is the only with 9)


### (2.1.2) % of (mayoral) elections won by third parties ----

# 1983-2023
bymun_full %>% 
  filter (midterm==0) %>% 
  select (municipio, year, rank_pj_general, rank_ucr_general, partido_1_pct_general) %>% 
  mutate (
    win_third = case_when (
      !is.na (rank_pj_general) & rank_pj_general==1 ~ 0
      , !is.na (rank_ucr_general) & rank_ucr_general==1 ~ 0
      , TRUE ~ 100)
    , pj_first = if_else (!is.na (rank_pj_general) & rank_pj_general==1, 100, 0)
    , ucr_first = if_else (!is.na (rank_ucr_general) & rank_ucr_general==1, 100, 0)) %>% 
  summary ()
## 52.0% PJ; 37.1% UCR; 10.9% others

# 2011-2023
bymun_full %>% 
  filter (midterm==0 & year %in% c (2011, 2015, 2019, 2023)) %>% 
  select (municipio, year, rank_pj_general, rank_ucr_general, partido_1_pct_general) %>% 
  mutate (
    win_third = case_when (
      !is.na (rank_pj_general) & rank_pj_general==1 ~ 0
      , !is.na (rank_ucr_general) & rank_ucr_general==1 ~ 0
      , TRUE ~ 100)
    , pj_first = if_else (!is.na (rank_pj_general) & rank_pj_general==1, 100, 0)
    , ucr_first = if_else (!is.na (rank_ucr_general) & rank_ucr_general==1, 100, 0)
    , maj_general = if_else (partido_1_pct_general>50, 100, 0)) %>% 
  summary ()
## 56.1% PJ; 35.9% UCR; 8.0% others



### (2.1.3) Number of registered voters in median municipality ----

# smallest municipality in 2011:
sort (unique (filter (bymun, year==2011)$padron_general))

# La Matanza and Lomas 2011:
bymun %>% 
  select (municipio, year, padron_general) %>% 
  filter (municipio %in% c ("LA MATANZA", "LOMAS DE ZAMORA")) %>% 
  unique ()
## 890K and 451K in 2011

# range, median and mean values:
bymun %>% 
  group_by (year) %>% 
  summarise (
    padron_median = median (padron_general)/1000
    , padron_mean = mean (padron_general)/1000
    , padron_min = min (padron_general)/1000
    , padron_max = max (padron_general)/1000)
# median: between 25K and 32K; mean: between 83K and 104K

# Conurbano vs interior
bymun %>% 
  group_by (year) %>% 
  mutate (padron_total = sum (padron_general)/1000) %>% 
  group_by (conurbano, year, padron_total) %>% 
  summarise (
    padron_sum = sum (padron_general)/1000) %>% 
  mutate (padron_pct = padron_sum / padron_total * 100) %>% 
  unique ()



### (2.1.4) Actual competition within primaries ----

## % of parties that feature multiple lists
with (byparty_full_l %>% 
    filter (election_type=="paso")
    , mean (n_faccions>1))[]*100 ## 23.3%

# conditional on there being multiple lists, the winner's margin is of...
with (byparty_full_l %>% 
        filter (election_type=="paso" & n_faccions>1), mean (
        faccion_mg>=10))[]*100 ## 10pp or more: 75.98%
with (byparty_full_l %>% 
        filter (election_type=="paso" & n_faccions>1), mean (
          faccion_mg>=25))[]*100 ## 25pp or more: 48.89%
with (byparty_full_l %>% 
        filter (election_type=="paso" & n_faccions>1), mean (
          faccion_mg>=50))[]*100 ## 50pp or more: 21.4%


## frequency of the top-2 factions belonging to the same party

# we first have to manipulate the data a bit
byfaction <- byparty_full_l %>% 
  filter (election_type == "paso") %>% 
  filter (year %in% 2011:2023) %>% 
  select (
    election_type:year, partyName, pjoficial:other, n_parties, n_faccions
    , rank_partido, rank_partido_classif, votos_partido:votos_partido_pct_classif
    , margin_12_pct:margin_23_pct, margin_12_pct_classif:margin_23_pct_classif
    , faccion_1_pct:faccion_mg) %>% 
  pivot_longer ( ## to get %'s for the two largest factions within each party
    cols = c (faccion_1_pct, faccion_2_pct)
    , names_to = "faction_rank"
    , values_to = "votos_faction_within") %>% 
  mutate ( ## % votes for the two largest factions
    single_faction = if_else (n_faccions==1, 1, 0)
    , faction_rank = if_else (faction_rank=="faccion_1_pct", 1, 2)
    , votos_faction_pct = case_when (
      n_faccions == 1 & faction_rank == 1 ~ votos_partido_pct
      , n_faccions == 1 & faction_rank == 2 ~ NA_real_
      , n_faccions > 1 ~ votos_partido_pct * (votos_faction_within/100))
    , votos_faction_pct_classif = case_when (
      n_faccions == 1 & faction_rank == 1 ~ votos_partido_pct_classif
      , n_faccions == 1 & faction_rank == 2 ~ NA_real_
      , n_faccions > 1 ~ votos_partido_pct_classif * (votos_faction_within/100)
    )) %>% 
  group_by (municipio, year) %>% ## party ID's of the largest party & faction
  mutate (
    rank_faction_overall = rank (-votos_faction_pct, ties.method="min")
    , rank_faction_overall_classif = rank (-votos_faction_pct_classif, ties.method="min")
    
    ## identifying the party ID of the largest party and faction
    , party_1_id = first (partyName[rank_partido == 1])
    , party_1_id_classif = first (partyName[rank_partido_classif == 1])
    , faction_1_id = first (partyName[rank_faction_overall == 1])
    , faction_2_id = first (partyName[rank_faction_overall == 2])
    , faction_1_id_classif = first (partyName[rank_faction_overall_classif == 1])
    , faction_2_id_classif = first (partyName[rank_faction_overall_classif == 2])
    
    ## largest faction not from largest party
    , party_faction_diff = if_else (party_1_id != faction_1_id, 1, 0)
    , party_faction_diff_classif = if_else (party_1_id_classif != faction_1_id_classif, 1, 0)
    , party_faction_diff_2 = party_faction_diff - party_faction_diff_classif
    
    ## two largest factions belong to largest party
    , party_faction_both = if_else (faction_1_id == faction_2_id, 1, 0)
    , party_faction_both_classif = if_else (faction_1_id_classif == faction_2_id_classif, 1, 0)
    , party_faction_both_2 = party_faction_both - party_faction_both_classif) %>% 
  ungroup ()
summary (byfaction) ## there are no differences using classif or not (there shouldn't be, actually). We focus on the non-classif cases

# doing the calculations
byfaction <- byfaction %>% ## municipality-year combinations
  select (municipio, year, margin_12_pct:margin_23_pct_classif
          , party_1_id:party_faction_both_classif) %>% 
  unique ()

mean (byfaction$party_faction_both, na.rm=TRUE)*100 ## 7%
byfaction %>% 
  filter (!is.na (party_faction_both)) %>% 
  group_by (party_faction_both) %>% 
  summarise (
    n = n()
    , margin_12_pct = mean (margin_12_pct, na.rm=TRUE)
    , margin_23_pct = mean (margin_23_pct, na.rm=TRUE))



### (2.1.5) Difference in turnout between primary and general ----

## differences in turnout

# by year -> higher in concurrent years
bymun %>% group_by (year) %>% select (turnout_pct_paso, turnout_pct_general) %>% mutate (turnout_diff = turnout_pct_general - turnout_pct_paso) %>% summarise (mean (turnout_diff))

# all periods -> 4.6pp
with (bymun %>% select (turnout_pct_paso, turnout_pct_general) %>% mutate (turnout_diff = turnout_pct_general - turnout_pct_paso), mean (turnout_diff, na.rm=TRUE)) 


## changes in the number of REGISTERED voters
with (bymun %>% mutate (
  padron_diff = padron_general - padron_paso
  , padron_diff_pct = padron_diff / padron_paso * 100), table (padron_diff, year, useNA="ifany"))
## 2017 and 2019 -> no more differences

with (bymun %>% mutate (
  padron_diff = padron_general - padron_paso
  , padron_diff_pct = padron_diff / padron_paso * 100), summary (padron_diff_pct))
## differences are small in relative terms, between -0.72% and 1.04% of registered voters in the primary



### (2.1.6) % votes parties that don't pass the threshold ----
with (
  byparty_full_l %>% 
    filter (election_type=="paso" & classif_party==0) %>% 
    group_by (municipio, year) %>% 
    summarise (votos_partido_pct = sum (votos_partido_pct)) %>% 
    ungroup (), mean (votos_partido_pct))
## 2.28%  



### (2.1.7) % dropping voluntarily ----
with (byparty_full_l %>% filter (election_type=="paso"), sum (dropped, na.rm=TRUE)) ## 81 dropped...
with (byparty_full_l %>% filter (election_type=="paso"), sum (classif_party, na.rm=TRUE)) ## out of 4,421 parties that qualified for the general election
with (byparty_full_l %>% 
        filter (election_type=="paso" & classif_party==1), mean (dropped, na.rm=TRUE))*100 ## 1.8%



### (2.1.8) correlation between Golosov and ENPV ----

# general elections, 1983-2023: 0.96
cor (bymun_full %>% select (partido_golosov_general, partido_enpv_general)) %>% round (2) 

# general elections, 2011-2023: 0.95
cor (bymun_full %>% filter (year>=2011) %>% select (partido_golosov_general, partido_enpv_general)) %>% round (2)

# primaries (parties that classified to the general only), 2011-2023: 0.96
cor (bymun_full %>% filter (year>=2011) %>% select (partido_golosov_classif_paso, partido_enpv_classif_paso)) %>% round (2)

# primaries (all parties), 2011-2023: 0.97
cor (bymun_full %>% filter (year>=2011) %>% select (partido_golosov_paso, partido_enpv_paso)) %>% round (2)



### (2.1.9) quantile values of margin of victory in the primary ----
bymun %>% 
  group_by (midterm) %>% 
  summarise (
    n = n()
    , margin_12_q05 = quantile (margin_12_pct_classif_paso, probs=0.05)
    , margin_12_q10 = quantile (margin_12_pct_classif_paso, probs=0.10)
    , margin_12_q25 = quantile (margin_12_pct_classif_paso, probs=0.25)
    , margin_12_q50 = quantile (margin_12_pct_classif_paso, probs=0.50)
    , margin_12_q75 = quantile (margin_12_pct_classif_paso, probs=0.75)
    , margin_12_q90 = quantile (margin_12_pct_classif_paso, probs=0.90)
    , margin_12_q95 = quantile (margin_12_pct_classif_paso, probs=0.95))



### (2.1.10) proportion of general elections decided by margins of 1, 2.5, 5, 10 and 25pp ----
with (bymun %>% filter (midterm==0), mean (margin_12_pct_general < 1))*100 ## 3.7%
with (bymun %>% filter (midterm==0), mean (margin_12_pct_general < 2.5))*100 ## 9.3%
with (bymun %>% filter (midterm==0), mean (margin_12_pct_general < 5))*100 ## 18.7%
with (bymun %>% filter (midterm==0), mean (margin_12_pct_general < 10))*100 ## 35.2%
with (bymun %>% filter (midterm==0), mean (margin_12_pct_general < 25))*100 ## 71.7%



### (2.1.11) overlapping between Conurbano vs Interior and small vs large municipalities ----

# % Conurbano vs interior, conditional on size
with (bymun, table (muni_size, conurbano, useNA="ifany")) %>% prop.table (1)*100

# % small vs large, conditional on location
with (bymun, table (conurbano, muni_size, useNA="ifany")) %>% prop.table (1)*100




##############################################
##### (2.2) Tables with descriptive stats ----
##############################################

### (2.2.1) By municipality-election (2011-2023) ----

## (2.2.1.1) preparing the data
tab_desc_muni <- bymun %>% 
  filter (year >= 2011) %>% 
  select ( ## variables for which we will report descriptive stats
    midterm, conurbano
    , margin_12_pct_classif_paso, margin_12_pct_general, margin_12_pct_delta
    , margin_23_pct_classif_paso, margin_23_pct_general, margin_23_pct_delta
    , turnout_pct_paso, turnout_pct_general, turnout_pct_delta
    , positive_classif_pct_paso, positive_pct_general, positive_pct_delta
    , partido_golosov_classif_paso, partido_golosov_general, partido_golosov_delta
    , sum_first2_pct_classif_paso, sum_first2_pct_general, sum_first2_pct_delta
    , sum_first3_pct_classif_paso, sum_first3_pct_general, sum_first3_pct_delta
    , partido_1_pct_classif_paso, partido_1_pct_general, partido_1_pct_delta
    , partido_2_pct_classif_paso, partido_2_pct_general, partido_2_pct_delta
    , partido_3_pct_classif_paso, partido_3_pct_general, partido_3_pct_delta) %>% 
  ## replicating three times: full sample; midterm vs. concurrent; and Conurbano vs. Interior
  slice (rep (1:n(), each = 3)) %>%
  mutate ( ## adding replication factor with custom labels
    sample = rep (c ("full", "calendar", "geography"), n()/3)
    , sample = factor (sample, levels=c ("full", "calendar", "geography"))) %>% 
  pivot_longer (
    cols = margin_12_pct_classif_paso:partido_3_pct_delta
    , names_to = "varname"
    , values_to = "value") %>% 
  filter (!is.na (value)) %>% 
  mutate (
    ele_type = case_when (
      grepl ("_paso", varname) ~ "primary"
      , grepl ("_general", varname) ~ "general"
      , grepl ("_delta", varname) ~ "delta")
    , ele_type = factor (ele_type, levels=c ("primary", "general", "delta"))
    , varname = str_replace_all (
      varname, c (
        "_classif_paso" = ""
        , "_classif" = ""
        , "_paso" = ""
        , "_general" = ""
        , "_delta" = ""))
    , varname = factor (varname)
    , subsample = case_when (
      sample == "full" ~ "only"
      , sample == "calendar" & midterm == 0 ~ "left"
      , sample == "calendar" & midterm == 1 ~ "right"
      , sample == "geography" & conurbano == "Conurbano" ~ "left"
      , sample == "geography" & conurbano == "Interior" ~ "right")
    , subsample = factor (subsample, levels=c ("only", "left", "right"))
    , varname_full = case_when (
      varname=="margin_12_pct" ~ "margin 1st vs 2nd (\\%)"
      , varname=="margin_23_pct" ~ "margin 2nd vs 3rd (\\%)"
      , varname=="turnout_pct" ~ "[1.0ex] turnout (\\%)"
      , varname=="positive_pct" ~ "positive votes (\\%)"
      , varname=="partido_golosov" ~ "[1.0ex] Golosov index"
      , varname=="sum_first2_pct" ~ "vote 1st + 2nd (\\%)"
      , varname=="sum_first3_pct" ~ "vote 1st + 2nd + 3rd (\\%)"
      , varname=="partido_1_pct" ~ "[1.0ex] vote 1st (\\%)"
      , varname=="partido_2_pct" ~ "vote 2nd (\\%)"
      , varname=="partido_3_pct" ~ "vote 3rd (\\%)")
    , varname_full = factor (varname_full, levels=c (
      "margin 1st vs 2nd (\\%)", "margin 2nd vs 3rd (\\%)"
      , "[1.0ex] turnout (\\%)", "positive votes (\\%)"
      , "[1.0ex] Golosov index", "vote 1st + 2nd (\\%)", "vote 1st + 2nd + 3rd (\\%)"
      , "[1.0ex] vote 1st (\\%)", "vote 2nd (\\%)", "vote 3rd (\\%)"))) %>% 
  group_by (sample, subsample, ele_type, varname_full) %>% 
  summarise (
    n = n()
    , mean = mean (value)
    , sd = sd (value)
    , min = min (value)
    , max = max (value)
    , n = sprintf ("%.0f", round (n, 0))
    , mean = sprintf ("%.1f", round (mean, 1))
    , sd = sprintf ("%.1f", round (sd, 1))
    , min = sprintf ("%.1f", round (min, 1))
    , max = sprintf ("%.1f", round (max, 1))) %>% 
  arrange (sample, subsample, ele_type, varname_full) %>% 
  filter (sample != "full") %>% 
  pivot_wider (
    id_cols = c (ele_type, varname_full)
    , names_from = c (subsample, sample)
    , values_from = n:max
    , names_vary = "slowest")


## (2.2.1.2) creating and exporting the table
Header1 <- paste ("\\toprule & \\multicolumn{5}{c}{\\textbf{concurrent elections}} & & \\multicolumn{5}{c}{\\textbf{midterm elections}} & & \\multicolumn{5}{c}{\\textbf{{\\it Conurbano}}} & & \\multicolumn{5}{c}{\\textbf{Interior}} \\\\ \\cmidrule{2-6} \\cmidrule{8-12} \\cmidrule{14-18} \\cmidrule{20-24} \n")
Header2 <- paste ("\\multicolumn{1}{l}{\\textbf{(a) Primary election}} & \\multicolumn{1}{c}{$N$} & \\multicolumn{1}{c}{mean} & \\multicolumn{1}{c}{\\textsc{sd}} & \\multicolumn{1}{c}{min} & \\multicolumn{1}{c}{max} & & \\multicolumn{1}{c}{$N$} & \\multicolumn{1}{c}{mean} & \\multicolumn{1}{c}{\\textsc{sd}} & \\multicolumn{1}{c}{min} & \\multicolumn{1}{c}{max} & & \\multicolumn{1}{c}{$N$} & \\multicolumn{1}{c}{mean} & \\multicolumn{1}{c}{\\textsc{sd}} & \\multicolumn{1}{c}{min} & \\multicolumn{1}{c}{max} & & \\multicolumn{1}{c}{$N$} & \\multicolumn{1}{c}{mean} & \\multicolumn{1}{c}{\\textsc{sd}} & \\multicolumn{1}{c}{min} & \\multicolumn{1}{c}{max} \\\\ \\midrule \n")
Header3 <- paste ("[2.0ex] \\multicolumn{24}{l}{\\textbf{(b) General election}} \\\\ \\midrule \n")
Header4 <- paste ("[2.0ex] \\multicolumn{12}{l}{\\textbf{(c) $\\Delta$: General $-$ Primary}} \\\\ \\midrule \n")
Bottom1 <- paste ("\\bottomrule \n")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 10
addtorow$pos[[4]] <- 20
addtorow$pos[[5]] <- 30
addtorow$command <- c (Header1, Header2, Header3, Header4, Bottom1)
print (xtable ( bind_cols (tab_desc_muni[,2:7], "", tab_desc_muni[,8:12], "", tab_desc_muni[,13:17], "", tab_desc_muni[,18:22])
                , align=c ("l","l","r","r","r","r","r","c","r","r","r","r","r","c","r","r","r","r","r","c","r","r","r","r","r")
                , digits=0
                , caption="Descriptive statistics (\\textsc{i}): Municipality-level outcomes, 2011-2023"
                , label="T:DescMuni")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )




#############################################
##### (2.3) Plots with descriptive stats ----
#############################################

### (2.3.1) Pivoting longer again to get one row per municipality-year-type-outcome ----
bymun_full_l2 <- bymun_full_l %>% 
  mutate ( ## primary values should be based on parties that CLASSIFIED to the general election
    margin_12_pct = case_when (
      election_type == "general" & is.na (partido_2_pct) ~ 100
      , election_type == "general" & !is.na (partido_2_pct) ~ partido_1_pct - partido_2_pct
      , election_type == "paso" & is.na (partido_2_pct_classif) ~ 100
      , election_type == "paso" & !is.na (partido_2_pct_classif) ~ partido_1_pct_classif - partido_2_pct_classif)
    , margin_23_pct = case_when (
      election_type == "general" & is.na (partido_3_pct) ~ partido_2_pct
      , election_type == "general" & !is.na (partido_3_pct) ~ partido_2_pct - partido_3_pct
      , election_type == "paso" & is.na (partido_3_pct_classif) ~ partido_2_pct_classif
      , election_type == "paso" & !is.na (partido_3_pct_classif) ~ partido_2_pct_classif - partido_3_pct_classif)
    , sum_first2_pct = case_when (
      election_type == "general" & is.na (partido_2_pct) ~ partido_1_pct
      , election_type == "general" & !is.na (partido_2_pct) ~ partido_1_pct + partido_2_pct
      , election_type == "paso" & is.na (partido_2_pct_classif) ~ partido_1_pct_classif
      , election_type == "paso" & !is.na (partido_2_pct_classif) ~ partido_1_pct_classif + partido_2_pct_classif)
    , golosov = case_when (
      election_type == "general" ~ partido_golosov
      , election_type == "paso" ~ partido_golosov_classif)
    , enpv = case_when (
      election_type == "general" ~ partido_enpv
      , election_type == "paso" ~ partido_enpv_classif)
    , votos_inc_pct = case_when (
      election_type == "general" ~ votos_inc_pct
      , election_type == "paso" ~ votos_inc_pct_classif)) %>% 
  pivot_longer (
    cols = c (
      margin_12_pct, margin_23_pct, turnout_pct, positive_pct, sum_first2_pct, golosov, enpv, votos_inc_pct)
    , names_to = "outcome", values_to = "outcome_value") %>% 
  filter (!is.na (outcome_value)) %>% 
  mutate (
    outcome = factor (outcome, levels=c (
      "margin_12_pct", "margin_23_pct", "turnout_pct", "positive_pct", "sum_first2_pct", "golosov", "enpv",  "votos_inc_pct", "partido_1_pct", "partido_2_pct", "partido_3_pct"))
    , outcome_full = case_match (
      outcome
      , "margin_12_pct" ~ "margin first vs. second (%)"
      , "margin_23_pct" ~ "margin second vs. third (%)"
      , "turnout_pct" ~ "turnout (%)"
      , "positive_pct" ~ "positive votes (%)"
      , "sum_first2_pct" ~ "votes first + second (%)"
      , "golosov" ~ "Golosov index"
      , "enpv" ~ "Effective Number of Parties"
      , "votos_inc_pct" ~ "vote incumbent (%)")
    , outcome_full = factor (outcome_full, levels=c (
      "margin first vs. second (%)", "margin second vs. third (%)", "turnout (%)", "positive votes (%)", "votes first + second (%)", "Golosov index", "Effective Number of Parties", "vote incumbent (%)"))) %>% 
  ungroup ()
summary (bymun_full_l2)



### (2.3.2) Proportion of parties dropping out and/or participating in the general election ----

## creating the dataset
dropouts <- byparty_full_l %>% 
  filter (year >= 2011 & election_type=="paso") %>% 
  select (year, midterm, classif_party, party_in_general, dropped, votos_partido_pct) %>% 
  mutate (
    full = 1 ## to pivot longer
    , equal = 1 ## ditto
    , drop_group = case_when (
      classif_party==0 & party_in_general==0 ~ "didn't qualify,\ndidn't run"
      , classif_party==1 & dropped==1 ~ "qualified,\ndropped out"
      , classif_party==0 & party_in_general==1 ~ "didn't qualify,\nran in general"
      , classif_party==1 & party_in_general==1 ~ "qualified,\nran in general")
    , drop_group = factor (drop_group, levels=c (
      "qualified,\nran in general"
      , "didn't qualify,\nran in general"
      , "qualified,\ndropped out"
      , "didn't qualify,\ndidn't run"))) %>% 
  pivot_longer (
    cols = c (full, midterm)
    , names_to = "sample"
    , values_to = "value") %>% 
  pivot_longer (
    cols = c (equal, votos_partido_pct)
    , names_to = "sample2"
    , values_to = "weight") %>% 
  mutate (
    sample = case_when (
      sample=="full" ~ "full"
      , sample=="midterm" & value==0 ~ "concurrent"
      , sample=="midterm" & value==1 ~ "midterm")
    , sample = factor (sample, levels=c (
      "full", "concurrent", "midterm"))
    , sample2 = ifelse (
      sample2 == "votos_partido_pct", "parties\nweighted by\nvote shares", "all parties\nweighted\nequally")
    , sample2 = factor (sample2, levels=c (
      "parties\nweighted by\nvote shares", "all parties\nweighted\nequally"))
    , weight = ifelse (sample2=="parties\nweighted by\nvote shares", weight/100, 1))
summary (dropouts)

# examining the raw data
dropouts %>% 
  group_by (sample, sample2, drop_group) %>% 
  summarise (sum_weights = sum (weight)) %>% 
  group_by (sample, sample2) %>% 
  mutate (pct = sum_weights / sum (sum_weights) * 100)
## there's some difference between concurrent and midterm elections, but after weighting by vote shares it disappears

# drawing the plot
(p_dropouts <- ggplot (
  dropouts %>% 
    group_by (sample, sample2, drop_group) %>% 
    summarise (sum_weights = sum (weight)) %>% 
    group_by (sample, sample2) %>% 
    mutate (pct = sum_weights / sum (sum_weights) * 100) %>% 
    ungroup () %>% filter (sample=="full")
  , aes (y=sample2, x=pct, fill=drop_group))
  + geom_vline (xintercept=50, linetype=2, col=col_cutoff)
  + geom_bar (stat="identity")
  + scale_x_continuous ("", breaks=seq (0, 100, by=25), labels=str_c (seq (0, 100, by=25), "%"))
  + scale_fill_manual (name="", values=c (
    wes_palette (col_movie)[4], "black", wes_palette (col_movie)[2], wes_palette (col_movie)[3]))
  + xlab ("") + ylab ("")
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-24,-9,-9,-9)))



### (2.3.3) Outcome variables: Evolution over time ----
(p_outcomes_series <- ggplot (
  bymun_full_l2 %>% filter (outcome %in% c ("margin_12_pct", "margin_23_pct", "turnout_pct", "positive_pct", "sum_first2_pct", "golosov")) %>% 
    group_by (year, election_type, outcome_full) %>% summarise (outcome_value=mean (outcome_value, na.rm=TRUE))
  , aes (x=year, y=outcome_value, col=election_type, linetype=election_type))
 + geom_vline (xintercept=unique (filter (bymun_full_l2, midterm==0)$year), col=col_concurrent)
 + geom_line (linewidth=size_tsline)
 + geom_jitter (
   data=bymun_full_l2 %>% filter (outcome %in% c ("margin_12_pct", "margin_23_pct", "turnout_pct", "positive_pct", "sum_first2_pct", "golosov"))
   , aes (x=year_type, y=outcome_value, col=election_type)
   , alpha=alpha_values, size=size_tsline/2, width=jitter_w, height=0)
 + facet_wrap (~ outcome_full, ncol=2, scales="free_y")
 + ylab ("")
 + scale_x_continuous (
   "", limits=c (1983, 2023.5), breaks=unique (filter (bymun_full_l2, midterm==0)$year)
   , labels=str_sub (unique (filter (bymun_full_l2, midterm==0)$year), 3))
 + scale_color_manual (name="", labels=c ("primary election", "general election"), values=c (col_paso, col_gen))
 + scale_linetype_discrete (name="", labels=c ("primary election", "general election"))
 + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-24,-9,-9,-9)))



### (2.3.4) Outcome variables: Correlation between rankings in primary and general ----

## building the dataset
tmp <- byparty_full %>% ## all observations
  mutate (ref="all parties", sample="all elections") %>%
  bind_rows (
    byparty_full %>% filter (midterm==0) %>% mutate (ref="all parties", sample="concurrent elections")) %>% 
  bind_rows (
    byparty_full %>% filter (midterm==1) %>% mutate (ref="all parties", sample="midterm elections")) %>% 
  bind_rows ( ## incumbent party
    byparty_full %>% filter (!is.na (inc_dummy) & inc_dummy==1) %>% mutate (ref="incumbent party", sample="all elections")) %>% 
  bind_rows (
    byparty_full %>% filter (!is.na (inc_dummy) & inc_dummy==1 & midterm==0) %>% mutate (ref="incumbent party", sample="concurrent elections")) %>% 
  bind_rows (
    byparty_full %>% filter (!is.na (inc_dummy) & inc_dummy==1 & midterm==1) %>% mutate (ref="incumbent party", sample="midterm elections")) %>% 
  bind_rows ( ## PJ
    byparty_full %>% filter (!is.na (pjoficial) & pjoficial==1) %>% mutate (ref="PJ", sample="all elections")) %>% 
  bind_rows (
    byparty_full %>% filter (!is.na (pjoficial) & pjoficial==1 & midterm==0) %>% mutate (ref="PJ", sample="concurrent elections")) %>% 
  bind_rows (
    byparty_full %>% filter (!is.na (pjoficial) & pjoficial==1 & midterm==1) %>% mutate (ref="PJ", sample="midterm elections")) %>% 
  bind_rows ( ## UCR
    byparty_full %>% filter (!is.na (ucr) & ucr==1) %>% mutate (ref="UCR", sample="all elections")) %>% 
  bind_rows (
    byparty_full %>% filter (!is.na (ucr) & ucr==1 & midterm==0) %>% mutate (ref="UCR", sample="concurrent elections")) %>% 
  bind_rows (
    byparty_full %>% filter (!is.na (ucr) & ucr==1 & midterm==1) %>% mutate (ref="UCR", sample="midterm elections")) %>% 
  filter (year %in% 2011:2023 & party_in_general==1 & !is.na (rank_partido_classif_paso)) %>% 
  mutate (
    ref = factor (ref, levels=c (
      "all parties", "incumbent party", "PJ", "UCR"))
    , sample = factor (sample)
    , rank_partido_classif_paso = floor (rank_partido_classif_paso) ## 3.5 to 3, etc
    , rank_partido_general = floor (rank_partido_general)) %>%
  group_by (ref, sample) %>%
  mutate (n_sample = str_c ("N = ", n (), sep="")) %>% 
  group_by (ref, sample, n_sample, rank_partido_classif_paso) %>%
  mutate (n_paso = n ()) %>%
  ungroup () %>%
  mutate (rank_partido_general = factor (rank_partido_general)) %>% ## need this to add 0's
  group_by (ref, sample, n_sample, rank_partido_classif_paso, n_paso, rank_partido_general, .drop=FALSE) %>% 
  summarise (n_comb = n ()) %>% 
  ungroup () %>% 
  mutate (
    rank_partido_general = rank_partido_general %>% as.character () %>% as.numeric ()
    , prop_comb = n_comb / n_paso
    , prop_comb_text = str_c (sprintf ("%.0f", round (prop_comb*100, 0)), "%", sep=""))

(p_rank_correl <- ggplot (
  data=tmp, aes (y=-rank_partido_classif_paso, x=rank_partido_general, fill=prop_comb))
  + geom_tile ()
  + geom_text (aes (label=prop_comb_text), col="white", fontface="bold", size=size_text*.6)
  + geom_text (aes (x=2, y=-9.5, label=n_sample))
  + scale_x_continuous (breaks=1:10)
  + scale_y_continuous (breaks=-10:-1, labels=10:1)
  + scale_fill_viridis_c (guide="none", begin=0, end=1, direction=-1, option="rocket")
  + facet_grid (ref ~ sample)
  + xlab ("rank: general election") + ylab ("rank: primary election")
  + theme (
    legend.title=element_blank ()
    , legend.box.margin=margin (-9,-9,-9,-9)))



### (2.3.5) Histograms / Density plots ----

## (2.3.5.1) Council sizes in 2011 ----
bymun %>% filter (year==2011) %>% select (concejoSize) %>% table () ## raw values
median (filter (bymun, year==2011)$concejoSize, na.rm=TRUE) ## 14
mean (filter (bymun, year==2011)$concejoSize, na.rm=TRUE) ## 15.3

(p_counciln <- ggplot (
  data = bymun %>% 
    filter (year==2011) %>% 
    group_by (concejoSize) %>% 
    summarise (n = n ()) %>% 
    ungroup () %>% 
    mutate (
      prop = n/sum(n)*100
      , label = str_c (sprintf ("%.1f", round (prop, 1)), "%\n(N = ", n, ")", sep=""))
  , aes (x = concejoSize, y = prop))
  + geom_bar (stat="identity", fill=col_paso)
  + geom_text (aes (label=label), col="black", fontface="bold", size=size_text)
  + scale_x_continuous (breaks=sort (unique (filter (bymun, year==2011)$concejoSize)))
  + xlab ("number of council seats in 2011")
  + ylab ("% of municipalities\n(N = 135)"))


## (2.3.5.2) Distribution of margins and dependent variables (general - primary) ----

# preparing the dataset
bymun_dist <- bymun %>% 
  select (
    municipio, year
    , margin_12_pct_classif_paso, margin_23_pct_classif_paso
    , turnout_pct_paso, positive_pct_paso
    , sum_first2_pct_classif_paso, partido_golosov_classif_paso
    , margin_12_pct_general, margin_23_pct_general
    , turnout_pct_general, positive_pct_general
    , sum_first2_pct_general, partido_golosov_general
    , margin_12_pct_delta, margin_23_pct_delta
    , turnout_pct_delta, positive_pct_delta
    , sum_first2_pct_delta, partido_golosov_delta) %>% 
  pivot_longer (
    cols = margin_12_pct_classif_paso:partido_golosov_delta
    , names_to = "varname"
    , values_to = "value") %>% 
  mutate (
    election = case_when (
      grepl ("general", varname) ~ "general election"
      , grepl ("paso", varname) ~ "primary election"
      , grepl ("delta", varname) ~ "difference:\ngeneral - primary")
    , election = factor (election, levels=c ("primary election", "general election", "difference:\ngeneral - primary"))
    , variable = case_when (
      grepl ("margin_12", varname) ~ "margin: 1 vs 2 (%)"
      , grepl ("margin_23", varname) ~ "margin: 2 vs 3 (%)"
      , grepl ("turnout", varname) ~ "turnout (%)"
      , grepl ("positive", varname) ~ "positive votes (%)"
      , grepl ("sum_first2", varname) ~ "votes first + second (%)"
      , grepl ("golosov", varname) ~ "Golosov index")
    , variable = factor (variable, levels=c (
      "margin: 1 vs 2 (%)", "margin: 2 vs 3 (%)", "turnout (%)", "positive votes (%)", "votes first + second (%)", "Golosov index")))
summary (bymun_dist)

# drawing the plot
(p_density_levels <- ggplot (
  bymun_dist, aes (x=value, fill=election))
  + geom_vline (xintercept=0, linetype=2, col=col_concurrent)
  + geom_density (alpha=alpha_values*3.5)
  + scale_fill_manual (name="election", values=c (col_paso, col_gen, wes_palette (col_movie)[5]))
  + xlab ("")
  + facet_wrap (~ variable, ncol=2, scales="free")
  + theme (legend.position="right", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9)))



### (2.3.6) Scatterplots: Relationship between X and Y ----

## (2.3.6.1) Main: concurrent vs. midterm elections (8 plots in total) ----

## delta: general (%) - primary (%)

# variables' range
bymun %>% select (
  margin_12_pct_paso, turnout_pct_delta, positive_pct_delta, sum_first2_pct_delta, partido_golosov_delta) %>% summary ()

# turnout (%)
(p_midt_turnout_delta <- ggplot (
  bymun, aes (x=margin_12_pct_paso, y=turnout_pct_delta, col=factor(1-midterm)))
  + geom_hline (yintercept=0, linetype=2, col=col_concurrent)
  + geom_smooth (data=bymun, aes (x=margin_12_pct_paso, y=turnout_pct_delta), 
                 method="lm", se=FALSE, linewidth=size_tsline/2, col=col_concurrent)
  + geom_smooth (aes (col=factor (1-midterm)), method="lm", se=FALSE, linewidth=size_tsline/2)
  + stat_poly_eq (use_label ("eq"), coef.digits=2, size=size_text*1.1
                  , label.x="right", label.y="top")
  + geom_point (aes (col=factor (1-midterm)), alpha=alpha_values*4)
  + scale_color_manual (name="", labels=c ("midterm year", "concurrent year"), values=c (col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 72), breaks=seq (0, 72, by=10))
  + scale_y_continuous (limits=c (-7.5, 18), breaks=seq (-5, 15, by=5))
  + ylab ("change in turnout\n(% general - % primary)")
  + xlab ("primary election: margin 1 vs. 2 (%)")
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9)))

# positive (%)
(p_midt_positive_delta <- ggplot (
  bymun, aes (x=margin_12_pct_paso, y=positive_pct_delta, col=factor(1-midterm)))
  + geom_hline (yintercept=0, linetype=2, col=col_concurrent)
  + geom_smooth (data=bymun, aes (x=margin_12_pct_paso, y=positive_pct_delta), 
                 method="lm", se=FALSE, linewidth=size_tsline/2, col=col_concurrent)
  + geom_smooth (aes (col=factor (1-midterm)), method="lm", se=FALSE, linewidth=size_tsline/2)
  + stat_poly_eq (use_label ("eq"), coef.digits=2, size=size_text*1.1
                  , label.x="right", label.y="top")
  + geom_point (aes (col=factor (1-midterm)), alpha=alpha_values*4)
  + scale_color_manual (name="", labels=c ("midterm year", "concurrent year"), values=c (col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 72), breaks=seq (0, 72, by=10))
  + scale_y_continuous (limits=c (-7.5, 18), breaks=seq (-5, 15, by=5))
  + ylab ("change in positive votes\n(% general - % primary)")
  + xlab ("primary election: margin 1 vs. 2 (%)")
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9)))

# vote top-2 (%)
(p_midt_sum2_delta <- ggplot (
  bymun, aes (x=margin_12_pct_paso, y=sum_first2_pct_delta, col=factor(1-midterm)))
  + geom_hline (yintercept=0, linetype=2, col=col_concurrent)
  + geom_smooth (data=bymun, aes (x=margin_12_pct_paso, y=sum_first2_pct_delta), 
                 method="lm", se=FALSE, linewidth=size_tsline/2, col=col_concurrent)
  + geom_smooth (aes (col=factor (1-midterm)), method="lm", se=FALSE, linewidth=size_tsline/2)
  + stat_poly_eq (use_label ("eq"), coef.digits=2, size=size_text*1.1
                  , label.x="right", label.y="top")
  + geom_point (aes (col=factor (1-midterm)), alpha=alpha_values*4)
  + scale_color_manual (name="", labels=c ("midterm year", "concurrent year"), values=c (col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 72), breaks=seq (0, 72, by=10))
  + scale_y_continuous (limits=c (-15.5, 26), breaks=seq (-15, 25, by=5))
  + ylab ("change in votes of top-2 parties\n(% general - % primary)")
  + xlab ("primary election: margin 1 vs. 2 (%)")
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9)))

# Golosov index (%)
(p_midt_golosov_delta <- ggplot (
  bymun, aes (x=margin_12_pct_paso, y=partido_golosov_delta, col=factor(1-midterm)))
  + geom_hline (yintercept=0, linetype=2, col=col_concurrent)
  + geom_smooth (data=bymun, aes (x=margin_12_pct_paso, y=partido_golosov_delta), 
                 method="lm", se=FALSE, linewidth=size_tsline/2, col=col_concurrent)
  + geom_smooth (aes (col=factor (1-midterm)), method="lm", se=FALSE, linewidth=size_tsline/2)
  + stat_poly_eq (use_label ("eq"), coef.digits=2, size=size_text*1.1
                  , label.x="right", label.y="top")
  + geom_point (aes (col=factor (1-midterm)), alpha=alpha_values*4)
  + scale_color_manual (name="", labels=c ("midterm year", "concurrent year"), values=c (col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 72), breaks=seq (0, 72, by=10))
  + scale_y_continuous (limits=c (-3, 1.5), breaks=seq (-3, 1.5, by=0.5))
  + ylab ("change in Golosov index\n(general - primary)")
  + xlab ("primary election: margin 1 vs. 2 (%)")
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9)))


## log (general/primary)

# variables' range
bymun %>% select (
  margin_12_log_paso, turnout_log_delta, positive_log_delta, sum_first2_log_delta, partido_golosov_log_delta) %>% summary ()

# turnout (log)
(p_midt_turnout_log <- ggplot (
  bymun, aes (x=margin_12_log_paso, y=turnout_log_delta, col=factor(1-midterm)))
  + geom_hline (yintercept=0, linetype=2, col=col_concurrent)
  + geom_smooth (data=bymun, aes (x=margin_12_log_paso, y=turnout_log_delta), 
                 method="lm", se=FALSE, linewidth=size_tsline/2, col=col_concurrent)
  + geom_smooth (aes (col=factor (1-midterm)), method="lm", se=FALSE, linewidth=size_tsline/2)
  + stat_poly_eq (use_label ("eq"), coef.digits=2, size=size_text*1.1
                  , label.x="right", label.y="top")
  + geom_point (aes (col=factor (1-midterm)), alpha=alpha_values*4)
  + scale_color_manual (name="", labels=c ("midterm year", "concurrent year"), values=c (col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 2.6), breaks=seq (0, 2.5, by=0.5))
  + scale_y_continuous (limits=c (-0.065, 0.26), breaks=seq (-0.05, 0.25, by=0.05))
  + ylab ("change in turnout\n(log[general] - log[primary])")
  + xlab ("primary election: log[votes first] - log[votes second]")
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9)))

# positive (log)
(p_midt_positive_log <- ggplot (
  bymun, aes (x=margin_12_log_paso, y=positive_log_delta, col=factor(1-midterm)))
  + geom_hline (yintercept=0, linetype=2, col=col_concurrent)
  + geom_smooth (data=bymun, aes (x=margin_12_log_paso, y=positive_log_delta), 
                 method="lm", se=FALSE, linewidth=size_tsline/2, col=col_concurrent)
  + geom_smooth (aes (col=factor (1-midterm)), method="lm", se=FALSE, linewidth=size_tsline/2)
  + stat_poly_eq (use_label ("eq"), coef.digits=2, size=size_text*1.1
                  , label.x="right", label.y="top")
  + geom_point (aes (col=factor (1-midterm)), alpha=alpha_values*4)
  + scale_color_manual (name="", labels=c ("midterm year", "concurrent year"), values=c (col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 2.6), breaks=seq (0, 2.5, by=0.5))
  + scale_y_continuous (limits=c (-0.065, 0.26), breaks=seq (-0.05, 0.26, by=0.05))
  + ylab ("change in positive votes\n(log[general] - log[primary])")
  + xlab ("primary election: log[votes first] - log[votes second]")
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9)))

# vote top-2 (log)
(p_midt_sum2_log <- ggplot (
  bymun, aes (x=margin_12_log_paso, y=sum_first2_log_delta, col=factor(1-midterm)))
  + geom_hline (yintercept=0, linetype=2, col=col_concurrent)
  + geom_smooth (data=bymun, aes (x=margin_12_log_paso, y=sum_first2_log_delta), 
                 method="lm", se=FALSE, linewidth=size_tsline/2, col=col_concurrent)
  + geom_smooth (aes (col=factor (1-midterm)), method="lm", se=FALSE, linewidth=size_tsline/2)
  + stat_poly_eq (use_label ("eq"), coef.digits=2, size=size_text*1.1
                  , label.x="right", label.y="top")
  + geom_point (aes (col=factor (1-midterm)), alpha=alpha_values*4)
  + scale_color_manual (name="", labels=c ("midterm year", "concurrent year"), values=c (col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 2.6), breaks=seq (0, 2.5, by=0.5))
  + ylab ("change in votes of top-2 parties\n(log[general] - log[primary])")
  + xlab ("primary election: log[votes first] - log[votes second]")
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9)))

# Golosov index (log)
(p_midt_golosov_log <- ggplot (
  bymun, aes (x=margin_12_log_paso, y=partido_golosov_log_delta, col=factor(1-midterm)))
  + geom_hline (yintercept=0, linetype=2, col=col_concurrent)
  + geom_smooth (data=bymun, aes (x=margin_12_log_paso, y=partido_golosov_log_delta), 
                 method="lm", se=FALSE, linewidth=size_tsline/2, col=col_concurrent)
  + geom_smooth (aes (col=factor (1-midterm)), method="lm", se=FALSE, linewidth=size_tsline/2)
  + stat_poly_eq (use_label ("eq"), coef.digits=2, size=size_text*1.1
                  , label.x="right", label.y="top")
  + geom_point (aes (col=factor (1-midterm)), alpha=alpha_values*4)
  + scale_color_manual (name="", labels=c ("midterm year", "concurrent year"), values=c (col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 2.6), breaks=seq (0, 2.5, by=0.5))
  + ylab ("change in Golosov index\n(log[general] - log[primary])")
  + xlab ("primary election: log[votes first] - log[votes second]")
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9)))


## (2.3.6.2) Large vs. small municipalities (4 plots in total) ----

## identifying large vs small municipalities (on the bases of 2011 Consejo size):
with (bymun %>% filter (year==2011) %>% select (municipio, concejoSize), table (concejoSize, useNA="always"))

# concejoSize
# 6   10   12   14   16   18   20   24 <NA> 
# 6   17   33   20   10   16   14   19    0
6+17+33+20 ## 76
10+16+14+19 ## 59


## drawing the plots

# turnout (%)
(p_size_turnout_delta <- ggplot (
  bymun, aes(x = margin_12_pct_paso, y = turnout_pct_delta, col = muni_size))
  + geom_hline(yintercept = 0, linetype = 2, col = col_concurrent)
  + geom_smooth(data = bymun, aes(x = margin_12_pct_paso, y = turnout_pct_delta), 
                method = "lm", se = FALSE, linewidth = size_tsline/2, col = col_concurrent)
  + geom_smooth(aes(col = muni_size), method = "lm", se = FALSE, linewidth = size_tsline/2)
  + stat_poly_eq(use_label("eq"), coef.digits = 2, size = size_text * 1.1, label.x = "right", label.y = "top")
  + geom_point (aes(col = muni_size), alpha = alpha_values * 4)
  + scale_color_manual (name = "", labels = c("small municipality", "large municipality"), values = c(col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 72), breaks=seq (0, 72, by=10))
  + scale_y_continuous (limits=c (-7.5, 18), breaks=seq (-5, 15, by=5))
  + ylab ("change in turnout\n(% general - % primary)")
  + xlab ("primary election: margin 1 vs. 2 (%)")
  + theme (legend.position = "bottom", legend.title = element_blank(), legend.box.margin = margin(-15, -9, -9, -9)))

# positive (%)
(p_size_positive_delta <- ggplot (
  bymun, aes(x = margin_12_pct_paso, y = positive_pct_delta, col = muni_size))
  + geom_hline(yintercept = 0, linetype = 2, col = col_concurrent)
  + geom_smooth(data = bymun, aes(x = margin_12_pct_paso, y = positive_pct_delta), 
                method = "lm", se = FALSE, linewidth = size_tsline/2, col = col_concurrent)
  + geom_smooth(aes(col = muni_size), method = "lm", se = FALSE, linewidth = size_tsline/2)
  + stat_poly_eq(use_label("eq"), coef.digits = 2, size = size_text * 1.1, label.x = "right", label.y = "top")
  + geom_point (aes(col = muni_size), alpha = alpha_values * 4)
  + scale_color_manual (name = "", labels = c("small municipality", "large municipality"), values = c(col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 72), breaks=seq (0, 72, by=10))
  + scale_y_continuous (limits=c (-7.5, 18), breaks=seq (-5, 15, by=5))
  + ylab ("change in positive votes\n(% general - % primary)")
  + xlab ("primary election: margin 1 vs. 2 (%)")
  + theme (legend.position = "bottom", legend.title = element_blank(), legend.box.margin = margin(-15, -9, -9, -9)))

# vote top-2 (%)
(p_size_sum2_delta <- ggplot (
  bymun, aes(x = margin_12_pct_paso, y = sum_first2_pct_delta, col = muni_size))
  + geom_hline(yintercept = 0, linetype = 2, col = col_concurrent)
  + geom_smooth(data = bymun, aes(x = margin_12_pct_paso, y = sum_first2_pct_delta), 
                method = "lm", se = FALSE, linewidth = size_tsline/2, col = col_concurrent)
  + geom_smooth(aes(col = muni_size), method = "lm", se = FALSE, linewidth = size_tsline/2)
  + stat_poly_eq(use_label("eq"), coef.digits = 2, size = size_text * 1.1, label.x = "right", label.y = "top")
  + geom_point (aes(col = muni_size), alpha = alpha_values * 4)
  + scale_color_manual (name = "", labels = c("small municipality", "large municipality"), values = c(col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 72), breaks=seq (0, 72, by=10))
  + scale_y_continuous (limits=c (-15.5, 26), breaks=seq (-15, 25, by=5))
  + ylab ("change in votes of top-2 parties\n(% general - % primary)")
  + xlab ("primary election: margin 1 vs. 2 (%)")
  + theme (legend.position = "bottom", legend.title = element_blank(), legend.box.margin = margin(-15, -9, -9, -9)))

# Golosov index
(p_size_golosov_delta <- ggplot (
  bymun, aes(x = margin_12_pct_paso, y = partido_golosov_delta, col = muni_size))
  + geom_hline(yintercept = 0, linetype = 2, col = col_concurrent)
  + geom_smooth(data = bymun, aes(x = margin_12_pct_paso, y = partido_golosov_delta), 
                method = "lm", se = FALSE, linewidth = size_tsline/2, col = col_concurrent)
  + geom_smooth(aes(col = muni_size), method = "lm", se = FALSE, linewidth = size_tsline/2)
  + stat_poly_eq(use_label("eq"), coef.digits = 2, size = size_text * 1.1, label.x = "right", label.y = "top")
  + geom_point (aes(col = muni_size), alpha = alpha_values * 4)
  + scale_color_manual (name = "", labels = c("small municipality", "large municipality"), values = c(col_paso, col_gen))
  + scale_x_continuous (limits=c (0, 72), breaks=seq (0, 72, by=10))
  + scale_y_continuous (limits=c (-3, 1.5), breaks=seq (-3, 1.5, by=0.5))
  + ylab ("change in Golosov index\n(general - primary)")
  + xlab ("primary election: margin 1 vs. 2 (%)")
  + theme (legend.position = "bottom", legend.title = element_blank(), legend.box.margin = margin(-15, -9, -9, -9)))



### (2.3.7) Google Trends data ----

## (2.3.7.1) importing the data

# PASO dates
(paso_dates <- data.frame (
  paso_date = as_date (c (
    "2011-08-14", "2013-08-11", "2015-08-09", "2017-08-13", "2019-08-11", "2021-09-12", "2023-08-13"))) %>% 
   mutate (year = year (paso_date)))

# file names
(files_trends <- list.files (
  path = "trends"
  , pattern = str_c ("(", str_c (seq (2011, 2023, by=2), collapse = "|"), ")\\.csv$")
  , full.names = TRUE))

(files_queries <- list.files (
  path = "trends", pattern = "*pasoWeek.csv", full.names = TRUE))

# putting everything into a single dataset
gtrends <- map_dfr (
  files_trends, ~read_csv (.x, skip=2, col_types=cols (.default="c")) %>% 
    rename_all (tolower) %>%
    mutate (year = str_sub (basename (.x), 1, 4) %>% as.numeric ())) %>% 
  pivot_longer (
    cols = `intendente: (buenos aires province)`:`presidente: (buenos aires province)`
    , names_to = "office"
    , values_to = "popularity") %>% 
  mutate (
    week = ymd (week)
    , popularity = case_when (
      popularity == "<1" ~ "0.5"
      , TRUE ~ popularity) %>% as.numeric ()
    , office = case_when (
      grepl ("presidente", office) ~ "president"
      , grepl ("gobernador", office) ~ "governor"
      , grepl ("intendente", office) ~ "mayor"
      , grepl ("concejal", office) ~ "councilor") %>% 
      factor (levels = c (
        "president", "governor", "mayor", "councilor")))
summary (gtrends)

gqueries <- map_dfr (
  files_queries, ~read_csv (.x, skip=1, n_max=10, col_types=cols (.default="c")) %>% 
    rename ("term" = "TOP") %>%
    separate_wider_delim (cols=term, delim=",", names=c ("term", "popularity")) %>% 
    mutate (year = basename (.x))) %>% 
  mutate (
    popularity = as.numeric (popularity)
    , year = year %>% str_replace_all (
      c ("queries" = "", "pasoWeek.csv" = "")) %>% 
        as.numeric ()
    , politics = if_else (
      grepl ("elecciones|primarias|argentina|resultados|padron|paso|voto|dolar|milei|buenos aires|gobierno|cristina|macri|scioli|alberto|massa", term), "yes", "no") %>% 
      factor (levels=c ("yes", "no"))) %>% 
  group_by (year) %>% ## to reorder the terms within years
  mutate (term = reorder_within (term, popularity, year)) %>%
  ungroup () ## warnings reported. Don't worry
summary (gqueries)


## (2.3.7.2) drawing the plots
(p_google_trends <- ggplot (
  gtrends, aes (x=week, y=popularity, color=office))
  + geom_line ()
  + geom_vline (
    data=paso_dates, aes (xintercept=paso_date)
    , linetype="dashed", color="black")
  + scale_color_manual (
    name = ""
    , values=c (
      wes_palette (col_movie)[4] ## presi
      , wes_palette (col_movie)[5] ## gover
      , wes_palette (col_movie)[2] ## mayor
      , wes_palette (col_movie)[1])) ## councilor
  + scale_x_date (
    date_labels = "%b" ## show only month abbreviation
    , date_breaks = "3 months" ## every 3 months
    , name = "")
  + ylab ("relative popularity in Google searches (%)")
  + facet_wrap (~year, ncol=2, scales="free_x")
  + theme (legend.position = "right", legend.title = element_blank(), legend.box.margin = margin (-24, -9, -9, -9)))

(p_google_queries <- ggplot (
  gqueries, aes (x=popularity, y=term, fill=politics))
  + geom_bar (stat="identity")
  + scale_fill_manual (
    name = "election\nrelated"
    , values = c (col_gen, col_paso))
  + scale_y_discrete (name="", labels = function(x) gsub ("___.*", "", x))
  + xlab ("relative popularity in Google searches (%)")
  + facet_wrap (~year, ncol=2, scales="free_y")
  + theme (
    legend.position = "right"
    , legend.box.margin = margin (-9, -9, -9, -9)))



### (2.3.8) Correlations between variables measured at the local, provincial and national level ----

## (2.3.8.1) preparing the data
bymun_correl <- bymun %>% 
  select (
    municipio, year
    , turnout_pct_paso, positive_pct_paso, sum_first2_pct_classif_paso, partido_golosov_classif_paso, margin_12_pct_classif_paso, margin_23_pct_classif_paso
    , turnout_pct_general, positive_pct_general, sum_first2_pct_general, partido_golosov_general, margin_12_pct_general, margin_23_pct_general) %>% 
  mutate (level = "local0") %>% 
  bind_rows ( ## adding the data from mesas
    byparty_mesa %>% 
      rename (
        year = elYear
        , municipio = muni
        , sum_first2_pct_classif = votes_top2_classif
        , partido_golosov_classif = golosov_classif
        , margin_12_pct_classif = margin12_classif
        , margin_23_pct_classif = margin23_classif) %>% 
      select (municipio, year, level, type, turnout_pct, positive_pct, sum_first2_pct_classif, partido_golosov_classif, margin_12_pct_classif, margin_23_pct_classif) %>% 
      unique () %>% 
      pivot_wider (
        id_cols = municipio:level
        , names_from = "type"
        , values_from = turnout_pct:margin_23_pct_classif) %>% 
      mutate (level = as.character (level)) %>% 
      rename_with (~str_replace( .x, "_classif_general", "_general"))) %>%
  rename_with (~str_replace (.x, "_classif_paso", "_paso")) %>%
  pivot_longer ( ## a factor for each variable
    cols = c (turnout_pct_paso:margin_23_pct_general)
    , names_to = c ("variable", ".value")
    , names_pattern = "(.+)_(paso|general)") %>% 
  mutate (
    level = level %>% factor (levels=c ("local0", "local", "provincial", "national"))
    , variable = case_when (
      variable %in% c ("turnout_pct") ~ "% turnout"
      , variable %in% c ("positive_pct") ~ "% positive"
      , variable %in% c ("sum_first2_pct") ~ "% first two"
      , variable %in% c ("partido_golosov") ~ "Golosov"
      , variable %in% c ("margin_12_pct") ~ "margin: 1 vs 2"
      , variable %in% c ("margin_23_pct") ~ "margin: 2 vs 3") %>% 
      factor (levels=c ("% turnout", "% positive", "% first two", "Golosov", "margin: 1 vs 2", "margin: 2 vs 3"))) %>% 
  arrange (level) %>% 
  group_by (municipio, level, variable) %>% ## demeaning by municipality
  mutate (
    paso = paso - mean (paso, na.rm=TRUE)
    , general = general - mean (general, na.rm=TRUE)) %>% 
  ungroup () %>% ## separate variables for each level
  pivot_wider (
    id_cols = c (municipio, year, variable)
    , names_from = "level"
    , values_from = c ("paso", "general")) %>% ## getting the correlations
  select (-municipio, -year) %>%
  group_by (variable) %>%
  group_modify (~ {
    cor (.x, use = "complete.obs") %>%
      as.data.frame () %>%
      rownames_to_column ("var1") %>%
      pivot_longer (-var1, names_to = "var2", values_to = "correlation")
  }) %>% 
  mutate ( ## changing names and re-factorizing
    var1 = case_when (
      var1 == "paso_local0" ~ "Primary: municipal\n(definitive)"
      , var1 == "paso_local" ~ "Primary: municipal\n(provisional)"
      , var1 == "paso_provincial" ~ "Primary: provincial"
      , var1 == "paso_national" ~ "Primary: national"
      , var1 == "general_local0" ~ "General: municipal\n(definitive)"
      , var1 == "general_local" ~ "General: municipal\n(provisional)"
      , var1 == "general_provincial" ~ "General: provincial"
      , var1 == "general_national" ~ "General: national") %>% 
      factor (levels = c (
        "Primary: municipal\n(definitive)", "Primary: municipal\n(provisional)"
        , "Primary: provincial", "Primary: national"
        , "General: municipal\n(definitive)", "General: municipal\n(provisional)"
        , "General: provincial", "General: national"))
    , var2 = case_when (
      var2 == "paso_local0" ~ "Primary: municipal\n(definitive)"
      , var2 == "paso_local" ~ "Primary: municipal\n(provisional)"
      , var2 == "paso_provincial" ~ "Primary: provincial"
      , var2 == "paso_national" ~ "Primary: national"
      , var2 == "general_local0" ~ "General: municipal\n(definitive)"
      , var2 == "general_local" ~ "General: municipal\n(provisional)"
      , var2 == "general_provincial" ~ "General: provincial"
      , var2 == "general_national" ~ "General: national") %>% 
      factor (levels = c ( ## these go in reverse order
        "General: national", "General: provincial"
        , "General: municipal\n(provisional)", "General: municipal\n(definitive)"
        , "Primary: national", "Primary: provincial"
        , "Primary: municipal\n(provisional)", "Primary: municipal\n(definitive)")))
summary (bymun_correl)


## (2.3.8.2) drawing the plots
(p_correl_levels <- ggplot (
  bymun_correl, aes (x=var1, y=var2, fill=correlation))
  + geom_tile ()
  + geom_text (aes (label=sprintf ("%.2f", correlation)), color="black", size=size_text*.9)
  + scale_fill_gradient2 (
    low=col_paso, high=col_gen, mid="white", midpoint=0, limits=c (-1, 1))
  + xlab ("") + ylab ("")
  + facet_wrap (~variable, ncol=2)
  + theme (axis.text.x=element_text (angle=45, hjust=1)))





#################################################
#################################################
########## (3) Closeness and outcomes ###########
#################################################
#################################################

###########################################################
##### (3.1) Estimating the models: Main specifications ----
###########################################################

### (3.1.1) Preparing the data ----
bymun_estim <- bymun %>% 
  pivot_longer ( ## pivoting longer by outcome
    cols = c (
      turnout_pct_general, turnout_pct_delta, turnout_log_general
      , positive_pct_general, positive_pct_delta, positive_log_general
      , sum_first2_pct_general, sum_first2_pct_delta, sum_first2_log_general
      , partido_golosov_general, partido_golosov_delta, partido_golosov_log_general)
    , names_to = "outcome"
    , values_to = "out_value") %>% 
  pivot_longer ( ## now by predictor
    cols = c (
      margin_12_pct_classif_paso, margin_12_log_paso, margin_12_pct_paso)
    , names_to = "predictor"
    , values_to = "pred_value") %>% 
  mutate (
    
    ## outcome
    out_measure = case_when (
      grepl ("turnout", outcome) ~ "turnout"
      , grepl ("positive", outcome) ~ "positive"
      , grepl ("sum_first2", outcome) ~ "first2"
      , grepl ("golosov", outcome) ~ "golosov")
    , out_measure = factor (out_measure, levels=c (
      "turnout", "positive", "first2", "golosov"))
    , out_scale = case_when (
      grepl ("_log", outcome) ~ "logs"
      , TRUE ~ "levels") %>% factor ()
    , out_timing = case_when (
      grepl ("_general", outcome) ~ "general"
      , grepl ("_delta", outcome) ~ "delta")
    , out_timing = factor (out_timing, levels=c ("general", "delta"))
    , outcome = factor (outcome)
    
    ## predictor
    , pred_margin = case_when (
      grepl ("_12", predictor) ~ "1 vs 2"
      , grepl ("_23", predictor) ~ "2 vs 3") %>% factor ()
    , pred_scale = case_when (
      grepl ("_log", predictor) ~ "logs"
      , TRUE ~ "levels") %>% factor ()
    , pred_denominator = case_when (
      predictor == "margin_12_pct_classif_paso" ~ "qualified"
      , TRUE ~ "all" ## includes logs
      ) %>% factor ()
    , predictor = factor (predictor)
    
    ## control -> value of outcome in PASO
    , out_lag = case_when (
      out_measure == "turnout" & out_scale == "levels" ~ turnout_pct_paso
      , out_measure == "positive" & out_scale == "levels" ~ positive_pct_paso
      , out_measure == "first2" & out_scale == "levels" & pred_denominator == "qualified" ~ sum_first2_pct_classif_paso
      , out_measure == "golosov" & out_scale == "levels"  & pred_denominator == "qualified" ~ partido_golosov_classif_paso
      , out_measure == "first2" & out_scale == "levels" & pred_denominator == "all" ~ sum_first2_pct_paso
      , out_measure == "golosov" & out_scale == "levels"  & pred_denominator == "all" ~ partido_golosov_paso
      , out_measure == "turnout" & out_scale == "logs" ~ turnout_log_paso
      , out_measure == "positive" & out_scale == "logs" ~ positive_log_paso
      , out_measure == "first2" & out_scale == "logs" ~ sum_first2_log_paso
      , out_measure == "golosov" & out_scale == "logs" ~ partido_golosov_log_classif_paso)
    
    ## pred-outcome combination -> not all outcomes go with all predictors (e.g. log-log specifications)
    , pred_outcome = case_when (
      pred_scale == out_scale & pred_margin == "1 vs 2" & out_measure != "partido_2" ~ str_c (predictor, outcome, sep=" -> ")
      , pred_scale == out_scale & pred_margin == "2 vs 3" & out_measure == "partido_2" ~ str_c (predictor, outcome, sep=" -> ")) %>% 
      factor ()) %>% 
  filter (!is.na (pred_outcome)) %>% 
  group_by (municipio, pred_outcome) %>% ## standardized versions of the variables
  mutate (
    n = n () ## should be 7 for all values
    , out_value_desv = out_value - mean (out_value)
    , out_value_sd = sd (out_value_desv)
    , out_value_std = out_value_desv/out_value_sd
    , pred_value_desv = pred_value - mean (pred_value)
    , pred_value_sd = sd (pred_value_desv)
    , pred_value_std = pred_value_desv/pred_value_sd
    , out_lag_desv = out_lag - mean (out_lag)
    , out_lag_sd = sd (out_lag_desv)
    , out_lag_std = out_lag_desv/out_lag_sd)
summary (bymun_estim)



### (3.1.2) Main specification: single predictor, perhaps with controls ----
mod_bymun_full <- feols (
  out_value ~ pred_value + sw0 (out_lag) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)

mod_bymun_full_std <- feols ( ## standardized version
  out_value_std ~ pred_value_std + sw0 (out_lag_std) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)


### (3.1.3) Midterm vs. concurrent ----
mod_bymun_midt <- feols (
  out_value ~ i(midterm, pred_value) + sw0 (out_lag) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)

mod_bymun_midt_pval <- feols ( ## to get the p-values of the difference between coefficients
  out_value ~ pred_value + pred_value:midterm + sw0 (out_lag) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)

mod_bymun_midt_std <- feols (
  out_value_std ~ i(midterm, pred_value_std) + sw0 (out_lag_std) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)

mod_bymun_midt_std_pval <- feols (
  out_value_std ~ pred_value_std + pred_value_std:midterm + sw0 (out_lag) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)


### (3.1.4) Conurbano vs. Interior ----
mod_bymun_conu <- feols (
  out_value ~ i(conurbano, pred_value) + sw0 (out_lag) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)

mod_bymun_conu_pval <- feols ( ## ditto
  out_value ~ pred_value + pred_value:conurbano + sw0 (out_lag) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)

mod_bymun_conu_std <- feols (
  out_value_std ~ i(conurbano, pred_value_std) + sw0 (out_lag_std) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)

mod_bymun_conu_std_pval <- feols ( ## ditto
  out_value_std ~ pred_value_std + pred_value_std:conurbano + sw0 (out_lag_std) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)


### (3.1.5) Council size (in 2011) ----
mod_bymun_size <- feols (
  out_value ~ i(concejoSize_2011, pred_value) + sw0 (out_lag) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)

mod_bymun_size_std <- feols (
  out_value_std ~ i(concejoSize_2011, pred_value_std) + sw0 (out_lag_std) | csw0 (municipio, year)
  , cluster = ~municipio, data=bymun_estim, split=~pred_outcome)

# at reviewer's request: midterm elections X size only
mod_bymun_midt_size <- feols (
  out_value ~ i(concejoSize_2011, pred_value) + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data=bymun_estim %>% 
    filter (
      midterm == 1
      , out_scale == "levels"
      , out_timing == "general"
      , pred_denominator == "qualified")
  , split=~out_measure)




####################################################
##### (3.2) Additional & complementary analyses ----
####################################################

### (3.2.1) "Horse race" between local, provincial and national-level results

## (3.2.1.1) Preparing the data ("horse race")
bymun_horse <- byparty_mesa %>%
  select (level:muni, registered, turnout, positive, blank:COMANDO, votes_1:votes_3, turnout_pct, positive_pct, votes_1_pct:margin23, golosov) %>%
  unique () %>% 
  pivot_longer (
    cols = c (turnout_pct, positive_pct, votes_top2, golosov)
    , names_to = "outcome"
    , values_to = "outcome_value") %>% 
  mutate (
    outcome = factor (outcome, levels=c (
      "turnout_pct", "positive_pct", "votes_top2", "golosov"))) %>% 
  pivot_wider (
    id_cols = c (type, elYear, muni_id, muni, outcome)
    , names_from = "level"
    , values_from = c (margin12, outcome_value)) %>% 
  pivot_wider (
    id_cols = c (elYear:outcome)
    , names_from = "type"
    , values_from = margin12_national:outcome_value_local) %>% 
  pivot_longer (
    cols = outcome_value_national_general:outcome_value_local_paso
    , names_to = c ("outcome_level", ".value")
    , names_pattern = "outcome_value_(.+)_(general|paso)") %>%
  rename (out_value = general, outcome_paso = paso) %>% 
  mutate (
    outcome_level = factor (outcome_level, levels = c ("local", "provincial", "national")))
summary (bymun_horse)


## (3.2.1.2) Estimating the models
mod_bymun_horse <- feols (
  out_value ~ margin12_local_paso + margin12_provincial_paso + margin12_national_paso + sw0 (outcome_paso) | muni + elYear
  , cluster = ~muni
  , data = bymun_horse
  , split = ~interaction (outcome, outcome_level))

mod_bymun_full_horse <- feols ( ## we lack precinct-level data for the 2011 PASO
  out_value ~ pred_value + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim %>% 
    filter (
      year > 2011 & predictor == "margin_12_pct_classif_paso" &
        out_scale == "levels" & out_timing == "general")
  , split = ~out_measure)



### (3.2.2) Internal divisions between first and second-placed parties

## (3.2.2.1) Competitive 1 vs 2: preparing the data

# variables to add
add_primary <- byparty_full %>% 
  filter ( ## top-2 parties (in PASO) only
    year >= 2011 & rank_partido_classif_paso <= 2) %>% 
  select ( ## variables of interest (to add to main dataset later)
    municipio, year, rank_partido_classif_paso, votos_partido_pct_classif_paso
    , primary_comp, n_faccions, votos_faccion_pct_party, faccion_mg) %>% 
  mutate (
    votos_faccion1_pct_classif = votos_partido_pct_classif_paso * (votos_faccion_pct_party/100)
    , votos_faccion2_pct_classif = votos_partido_pct_classif_paso * ((votos_faccion_pct_party - faccion_mg)/100)) %>% 
  arrange (rank_partido_classif_paso) %>% 
  pivot_wider (
    id_cols = c (municipio, year)
    , names_from = "rank_partido_classif_paso"
    , values_from = c (primary_comp:votos_faccion2_pct_classif)) %>% 
  mutate (
    primary_comp_configuration = case_when ( ## competitive vs not competitive
      primary_comp_1 == 0 & primary_comp_2 == 0 ~ "neither"
      , primary_comp_1 == 1 & primary_comp_2 == 0 ~ "first"
      , primary_comp_1 == 0 & primary_comp_2 == 1 ~ "second"
      , primary_comp_1 == 1 & primary_comp_2 == 1 ~ "both") %>% 
      factor (levels=c ("neither", "first", "second", "both"))
    
    , faccion_mg_1 = if_else (is.na (faccion_mg_1), 100, faccion_mg_1)
    , faccion_mg_2 = if_else (is.na (faccion_mg_2), 100, faccion_mg_2)
    , primary_comp_configuration25 = case_when (
      faccion_mg_1 > 25 & faccion_mg_2 > 25 ~ "neither"
      , faccion_mg_1 <= 25 & faccion_mg_2 > 25 ~ "first"
      , faccion_mg_1 > 25 & faccion_mg_2 <= 25 ~ "second"
      , faccion_mg_1 <= 25 & faccion_mg_2 <= 25 ~ "both") %>% 
      factor (levels=c ("neither", "first", "second", "both")))

# adding to main dataset
bymun_estim_primary <- bymun_estim %>% 
  left_join (add_primary
             , by = c ("municipio" = "municipio", "year" = "year")) %>% 
  pivot_longer (
    cols = primary_comp_configuration:primary_comp_configuration25
    , names_to = "contested_criterion"
    , values_to = "primary_comp_configuration") %>% 
  mutate (
    contested_criterion = case_when (
      contested_criterion == "primary_comp_configuration" ~ "simple"
      , contested_criterion == "primary_comp_configuration25" ~ "margin: 25") %>% 
      factor (levels = c ("simple", "margin: 25"))) %>% 
  filter (predictor == "margin_12_pct_classif_paso" & 
            out_scale == "levels" & out_timing == "general")
summary (bymun_estim_primary)


## (3.2.2.2) Competitive 1 vs 2: estimating the models
mod_bymun_primary <- feols (
  out_value ~ i(primary_comp_configuration, pred_value) + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim_primary
  , split = ~interaction (out_measure, contested_criterion))


## (3.2.2.3) Largest list only: preparing the data
bymun_estim <- bymun_estim %>% 
  left_join (
    add_primary %>% 
      select (municipio, year, votos_faccion1_pct_classif_1, votos_faccion1_pct_classif_2), by = c ("municipio" = "municipio", "year" = "year")) %>% 
  mutate ( ## margin of the top voted faction of the top voted parties in the primary:
    pred_value_primary = votos_faccion1_pct_classif_1 - votos_faccion1_pct_classif_2
    , pred_value_primary_abs = abs (pred_value_primary))
summary (bymun_estim)


## (3.2.2.4) Largest list only: estimating the models

# Main specification: single predictor
mod_bymun_full_primary <- feols (
  out_value ~ pred_value_primary_abs + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim %>% filter (
    predictor == "margin_12_pct_classif_paso" &
      out_scale == "levels" & out_timing == "general")
  , split=~pred_outcome)

# Midterm vs. concurrent
mod_bymun_midt_primary <- feols (
  out_value ~ i(midterm, pred_value_primary_abs) + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim %>% filter (
    predictor == "margin_12_pct_classif_paso" &
      out_scale == "levels" & out_timing == "general")
  , split = ~pred_outcome)
mod_bymun_midt_primary_pval <- feols (
  out_value ~ pred_value_primary_abs + pred_value_primary_abs:midterm + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim %>% filter (
    predictor == "margin_12_pct_classif_paso" &
      out_scale == "levels" & out_timing == "general")
  , split = ~pred_outcome)

# Conurbano vs. Interior
mod_bymun_conu_primary <- feols (
  out_value ~ i(conurbano, pred_value_primary_abs) + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim %>% filter (
    predictor == "margin_12_pct_classif_paso" &
      out_scale == "levels" & out_timing == "general")
  , split = ~pred_outcome)
mod_bymun_conu_primary_pval <- feols (
  out_value ~ pred_value_primary_abs + pred_value_primary_abs:conurbano + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim %>% filter (
    predictor == "margin_12_pct_classif_paso" &
      out_scale == "levels" & out_timing == "general")
  , split = ~pred_outcome)

# Council size (in 2011)
mod_bymun_size_primary <- feols (
  out_value ~ i(concejoSize_2011, pred_value_primary_abs) + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim %>% filter (
    predictor == "margin_12_pct_classif_paso" &
      out_scale == "levels" & out_timing == "general")
  , split = ~pred_outcome)




### (3.2.3) Distance to council majority ----

## (3.2.3.1) Preparing the dataset

# calculating distance to majority in the council
add_maj <- byparty_full %>% 
  mutate ( ## expected concejales PJ vs UCR
    concejales_pj_general = if_else (
      !is.na (ref_party) & ref_party == "pj", concejales_general, NA)
    , concejales_pj_paso = if_else (
      !is.na (ref_party) & ref_party == "pj", concejales_paso, NA)
    , concejales_ucr_general = if_else (
      !is.na (ref_party) & ref_party == "ucr", concejales_general, NA)
    , concejales_ucr_paso = if_else (
      !is.na (ref_party) & ref_party == "ucr", concejales_paso, NA)) %>% 
  group_by ( ## lagged value from previous election
    municipio, ref_party) %>% 
  mutate (
    concejales_pj_general_lag = lag (concejales_pj_general, n=1, order_by=year)
    , concejales_ucr_general_lag = lag (concejales_ucr_general, n=1, order_by=year)) %>% 
  ungroup () %>% 
  filter (year >= 2011) %>% 
  group_by (municipio, year) %>% 
  mutate (
    
    # getting a single value for each election
    concejales_pj_general = max_na (concejales_pj_general)
    , concejales_pj_paso = max_na (concejales_pj_paso)
    , concejales_ucr_general = max_na (concejales_ucr_general)
    , concejales_ucr_paso = max_na (concejales_ucr_paso)
    , concejales_pj_general_lag = max_na (concejales_pj_general_lag)
    , concejales_ucr_general_lag = max_na (concejales_ucr_general_lag)
    
    # expected N of concejales after the PASO: existing ones + the N based on electoral results
    , concejales_pj_exp_paso = concejales_pj_general_lag + concejales_pj_paso
    , concejales_pj_maj_paso =  concejales_pj_exp_paso - concejoSize_nmaj
    
    , concejales_ucr_exp_paso = concejales_ucr_general_lag + concejales_ucr_paso
    , concejales_ucr_maj_paso =  concejales_ucr_exp_paso - concejoSize_nmaj
    
    , category_maj = case_when (
      midterm == 0 & rank_pj_general == 1 & concejales_pj_maj_paso %in% -1:0 ~ "pj near"
      , midterm == 0 & rank_pj_general == 1 ~ "pj far"
      , midterm == 1 & inc_pj == 1 & concejales_pj_maj_paso %in% -1:0 ~ "pj near"
      , midterm == 1 & inc_pj == 1~ "pj far"
      , midterm == 0 & rank_ucr_general == 1 & concejales_ucr_maj_paso %in% -1:0 ~ "ucr near"
      , midterm == 0 & rank_ucr_general == 1 ~ "ucr far"
      , midterm == 1 & inc_ucr == 1 & concejales_ucr_maj_paso %in% -1:0 ~ "ucr near"
      , midterm == 1 & inc_ucr == 1 ~ "ucr far"
      ) %>% factor ()
    , category_maj_simp = case_when (
      category_maj %in% c ("pj near", "ucr near") ~ "inc near"
      , category_maj %in% c ("pj far", "ucr far") ~ "inc far") %>% 
      factor ()
    , category_maj_cale = case_when (
      midterm == 0 ~ str_c ("concurrent: ", category_maj_simp, sep="")
      , midterm == 1 ~ str_c ("midterm: ", category_maj_simp, sep="")) %>% 
      factor (levels = c ("concurrent: inc near", "concurrent: inc far", "midterm: inc near", "midterm: inc far"))) %>% 
  ungroup () %>% 
  select (municipio, year, concejales_pj_general:category_maj_cale) %>% 
  unique ()
summary (add_maj)

# adding to main dataset
bymun_estim <- bymun_estim %>% 
  left_join (add_maj, by = c ("municipio" = "municipio", "year" = "year"))
summary (bymun_estim)


## (3.2.3.2) Estimating the models
mod_bymun_maj <- feols (
  out_value ~ i(category_maj_simp, pred_value) + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim %>% filter (
    predictor == "margin_12_pct_classif_paso" &
      out_scale == "levels" & out_timing == "general")
  , split = ~out_measure)

mod_bymun_maj_cale <- feols (
  out_value ~ i(category_maj_cale, pred_value) + sw0 (out_lag) | municipio + year
  , cluster = ~municipio
  , data = bymun_estim %>% filter (
    predictor == "margin_12_pct_classif_paso" &
      out_scale == "levels" & out_timing == "general")
  , split = ~out_measure)



### (3.2.4) Predicting outcome in G_t with P_t vs G_t-2 (in levels) ----

## (3.2.4.1) Preparing the dataset
bymun_lag <- bymun_full %>% ## pivoting longer by outcome
  pivot_longer (
    cols = c (
      margin_12_pct_general
      , margin_23_pct_general
      , turnout_pct_general
      , positive_pct_general
      , sum_first2_pct_general
      , partido_golosov_general)
    , names_to = "outcome"
    , values_to = "out_value") %>% 
  mutate (
    outcome = factor (outcome, levels=c (
      "margin_12_pct_general", "margin_23_pct_general", "turnout_pct_general", "positive_pct_general", "sum_first2_pct_general", "partido_golosov_general"))
    , out_value_paso = case_when (
      outcome == "margin_12_pct_general" ~ margin_12_pct_classif_paso
      , outcome == "margin_23_pct_general" ~ margin_23_pct_classif_paso
      , outcome == "turnout_pct_general" ~ turnout_pct_paso
      , outcome == "positive_pct_general" ~ positive_pct_paso
      , outcome == "sum_first2_pct_general" ~ sum_first2_pct_classif_paso
      , outcome == "partido_golosov_general" ~ partido_golosov_classif_paso)) %>% 
  group_by (municipio, outcome) %>% ## getting lagged value
  mutate (
    out_value_lag_t1 = lag (out_value, n=1, order_by=year)
    , out_value_lag_t2 = lag (out_value, n=2, order_by=year)) %>% 
  filter (year %in% 2011:2023) %>% 
  ungroup ()
summary (bymun_lag)


## (3.2.4.2) Estimating the models
mod_bymun_lag <- feols (
  out_value ~ mvsw (out_value_paso, out_value_lag_t1, out_value_lag_t2) | csw0 (municipio, year)
  , cluster = ~municipio
  , data = bymun_lag
  , split = ~outcome)




####################################
##### (3.3) Building the tables ----
####################################

### (3.3.0) Common headers
Header2 <- paste ("\\multicolumn{1}{l}{\\textbf{(a) Overall effect}} & \\multicolumn{1}{c}{(1)} & \\multicolumn{1}{c}{(2)} & & \\multicolumn{1}{c}{(3)} & \\multicolumn{1}{c}{(4)} & & \\multicolumn{1}{c}{(5)} & \\multicolumn{1}{c}{(6)} & & \\multicolumn{1}{c}{(7)} & \\multicolumn{1}{c}{(8)} \\\\ \\midrule \n")
Header3 <- paste ("[1.75ex] \\multicolumn{12}{l}{\\textbf{(b) Marginal effects in Concurrent vs. Midterm elections}} \\\\ \\midrule \n")
Header4 <- paste ("[1.75ex] \\multicolumn{12}{l}{\\textbf{(c) Marginal effects in {\\it Conurbano} vs. Interior municipalities}} \\\\ \\midrule \n")
Header5 <- paste ("[1.75ex] \\multicolumn{12}{l}{\\textbf{(d) Marginal effects by council size (as measured in 2011)}} \\\\ \\midrule \n")
add1 <- paste ("[1.75ex] \\multicolumn{1}{l}{Municipality \\textsc{fe}s} & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} \\\\ \n")
add2 <- paste ("\\multicolumn{1}{l}{Year \\textsc{fe}s} & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} \\\\ \n")
add3 <- paste ("\\multicolumn{1}{l}{Outcome in primary} & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} \\\\ \n")



### (3.3.1) Main, Levels: pct_paso_classif -> pct_general

# getting the coefficients
(tab_bymun_levels <- rbind (
  etable (mod_bymun_full[sample=c (12, 8, 10, 6), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:5),] ## "r3" means "round to 3 decimal places and always display 4 decimal places"
  , etable (mod_bymun_midt[sample=c (12, 8, 10, 6), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_midt_pval[sample=c (12, 8, 10, 6), fixef="year"]) %>% filter (coefficient=="pred_value:midterm") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~(concurrent vs. midterm)", rep ("", 8))
  , etable (mod_bymun_conu[sample=c (12, 8, 10, 6), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_conu_pval[sample=c (12, 8, 10, 6), fixef="year"]) %>% filter (coefficient=="pred_value:conurbanoConurbano") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~({\\it Conurbano} vs. Interior)", rep ("", 8))
  , etable (mod_bymun_size[sample=c (12, 8, 10, 6), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:19, 27),]) %>%
  `colnames<-`(c ("...1", colnames(.)[-1])) %>% ## to rename first column
  mutate ( ## updating row names
    `...1` = ifelse ( ## closing parenthesis
      grepl (" x ", `...1`), str_c (`...1`, ")"), `...1`)
    , `...1` = str_trim (`...1`)
    , `...1` = str_replace_all ( ## variable names
      `...1`, c (
        "pred_value" = "{\\\\it margin}$^\\\\text{P}$"
        , " x " = " ("
        , "midterm = 0" = "concurrent"
        , "midterm = 1" = "midterm"
        , "conurbano = Conurbano" = "{\\\\it Conurbano}"
        , "conurbano = Interior" = "Interior"
        , "concejoSize_2011 =" = "council size ="))) %>% 
  mutate (across (
    .cols = -`...1` ## all columns except Z
    , .fns = ~if_else (
      `...1` == "$p$-value of the difference"
      , str_c ("\\multirow{2}{*}{", .x, "}")
      , .x))))

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{\\% {\\it turnout}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it positive}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it first two}$^\\text{G}$} & & \\multicolumn{2}{c}{{\\it Golosov}$^\\text{G}$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.5cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each panel-column combination reports a different specification.
  The outcome is always measured in the general election.
  {\\it margin}$^{\\text{P}}$ is the difference between the \\% of votes of the leading and trailing parties in the primary election, including only parties that classified to the general election in the denominator.
  The sample is the same in all panels, but panels~(b) through~(d) report {\\it marginal} effects for different subsets of the sample; the ``$p$-value of the difference'' indicates whether these are statistically different from each other.
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 2
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 14
addtorow$pos[[6]] <- 30
addtorow$pos[[7]] <- 30
addtorow$pos[[8]] <- 30
addtorow$pos[[9]] <- 31
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_levels[,1:3], "", tab_bymun_levels[,4:5], "", tab_bymun_levels[,6:7], "", tab_bymun_levels[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Between-party closeness in the primary and general election outcomes"
                , label="T:ClosenessLevelsFull")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.2) Main, Delta: pct_paso_classif -> (pct_general - pct_paso_classif)

# getting the coefficients
(tab_bymun_delta <- rbind (
  etable (mod_bymun_full[sample=c (11, 7, 9, 5), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:5),]
  , etable (mod_bymun_midt[sample=c (11, 7, 9, 5), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_midt_pval[sample=c (11, 7, 9, 5), fixef="year"]) %>% filter (coefficient=="pred_value:midterm") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~(concurrent vs. midterm)", rep ("", 8))
  , etable (mod_bymun_conu[sample=c (11, 7, 9, 5), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_conu_pval[sample=c (11, 7, 9, 5), fixef="year"]) %>% filter (coefficient=="pred_value:conurbanoConurbano") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~({\\it Conurbano} vs. Interior)", rep ("", 8))
  , etable (mod_bymun_size[sample=c (11, 7, 9, 5), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:19, 27),]) %>%
    `colnames<-`(c ("...1", colnames(.)[-1])) %>% ## to rename first column
    mutate ( ## updating row names
      `...1` = ifelse ( ## closing parenthesis
        grepl (" x ", `...1`), str_c (`...1`, ")"), `...1`)
      , `...1` = str_trim (`...1`)
      , `...1` = str_replace_all ( ## variable names
        `...1`, c (
          "pred_value" = "{\\\\it margin}$^\\\\text{P}$"
          , " x " = " ("
          , "midterm = 0" = "concurrent"
          , "midterm = 1" = "midterm"
          , "conurbano = Conurbano" = "{\\\\it Conurbano}"
          , "conurbano = Interior" = "Interior"
          , "concejoSize_2011 =" = "council size ="))) %>% 
    mutate (across (
      .cols = -`...1` ## all columns except Z
      , .fns = ~if_else (
        `...1` == "$p$-value of the difference"
        , str_c ("\\multirow{2}{*}{", .x, "}")
        , .x))))

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{$\\Delta$ {\\it turnout}$^{\\text{G}-\\text{P}}$} & & \\multicolumn{2}{c}{$\\Delta$ {\\it positive}$^{\\text{G}-\\text{P}}$} & & \\multicolumn{2}{c}{$\\Delta$ {\\it first two}$^{\\text{G}-\\text{P}}$} & & \\multicolumn{2}{c}{$\\Delta$ {\\it Golosov}$^{\\text{G}-\\text{P}}$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.75cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each panel-column combination reports a different specification.
  {\\it margin}$^{\\text{P}}$ is the difference between the \\% of votes of the leading and trailing parties in the primary election, including only parties that classified to the general election in the denominator.
  Outcomes are measured as the difference between the value measured in the general election and the one measured in the primary.
  The sample is the same in all panels, but panels~(b) through~(d) report {\\it marginal} effects for different subsections of the sample; the ``$p$-value of the difference'' indicates whether these are statistically different from each other.
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 2
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 14
addtorow$pos[[6]] <- 30
addtorow$pos[[7]] <- 30
addtorow$pos[[8]] <- 30
addtorow$pos[[9]] <- 31
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_delta[,1:3], "", tab_bymun_delta[,4:5], "", tab_bymun_delta[,6:7], "", tab_bymun_delta[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Between-party closeness in the primary and first-differenced outcomes"
                , label="T:ClosenessDelta")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.3) Main, Levels, all parties in the denominator: pct_paso -> pct_general

# getting the coefficients
(tab_bymun_quali <- rbind (
  etable (mod_bymun_full[sample=c (20, 16, 18, 14), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:5),]
  , etable (mod_bymun_midt[sample=c (20, 16, 18, 14), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_midt_pval[sample=c (20, 16, 18, 14), fixef="year"]) %>% filter (coefficient=="pred_value:midterm") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~(concurrent vs. midterm)", rep ("", 8))
  , etable (mod_bymun_conu[sample=c (20, 16, 18, 14), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_conu_pval[sample=c (20, 16, 18, 14), fixef="year"]) %>% filter (coefficient=="pred_value:conurbanoConurbano") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~({\\it Conurbano} vs. Interior)", rep ("", 8))
  , etable (mod_bymun_size[sample=c (20, 16, 18, 14), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:19, 27),]) %>%
    `colnames<-`(c ("...1", colnames(.)[-1])) %>% ## to rename first column
    mutate ( ## updating row names
      `...1` = ifelse ( ## closing parenthesis
        grepl (" x ", `...1`), str_c (`...1`, ")"), `...1`)
      , `...1` = str_trim (`...1`)
      , `...1` = str_replace_all ( ## variable names
        `...1`, c (
          "pred_value" = "{\\\\it margin}$^\\\\text{P}$"
          , " x " = " ("
          , "midterm = 0" = "concurrent"
          , "midterm = 1" = "midterm"
          , "conurbano = Conurbano" = "{\\\\it Conurbano}"
          , "conurbano = Interior" = "Interior"
          , "concejoSize_2011 =" = "council size ="))) %>% 
    mutate (across (
      .cols = -`...1` ## all columns except Z
      , .fns = ~if_else (
        `...1` == "$p$-value of the difference"
        , str_c ("\\multirow{2}{*}{", .x, "}")
        , .x))))

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{\\% {\\it turnout}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it positive}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it first two}$^\\text{G}$} & & \\multicolumn{2}{c}{{\\it Golosov}$^\\text{G}$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.5cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each panel-column combination reports a different specification.
  The outcome is always measured in the general election.
  {\\it margin}$^{\\text{P}}$ is the difference between the \\% of votes of the leading and trailing parties in the primary election, but unlike the case of Tables~\\ref{T:ClosenessLevelsShort} and~\\ref{T:ClosenessLevelsFull}, all parties that participated in the primary are included in the denominator.
  The sample is the same in all panels, but panels~(b) through~(d) report {\\it marginal} effects for different subsets of the sample; the ``$p$-value of the difference'' indicates whether these are statistically different from each other.
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 2
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 14
addtorow$pos[[6]] <- 30
addtorow$pos[[7]] <- 30
addtorow$pos[[8]] <- 30
addtorow$pos[[9]] <- 31
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_quali[,1:3], "", tab_bymun_quali[,4:5], "", tab_bymun_quali[,6:7], "", tab_bymun_quali[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Between-party closeness in the primary and general election outcomes --Including all parties in the denominator"
                , label="T:ClosenessQuali")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.4) Main, Logs: log (paso_A/paso_B) -> log(general)

# getting the coefficients
(tab_bymun_logs <- rbind (
  etable (mod_bymun_full[sample=c (4, 2, 3, 1), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:5),]
  , etable (mod_bymun_midt[sample=c (4, 2, 3, 1), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_midt_pval[sample=c (4, 2, 3, 1), fixef="year"]) %>% filter (coefficient=="pred_value:midterm") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~(concurrent vs. midterm)", rep ("", 8))
  , etable (mod_bymun_conu[sample=c (4, 2, 3, 1), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_conu_pval[sample=c (4, 2, 3, 1), fixef="year"]) %>% filter (coefficient=="pred_value:conurbanoConurbano") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~({\\it Conurbano} vs. Interior)", rep ("", 8))
  , etable (mod_bymun_size[sample=c (4, 2, 3, 1), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:19, 27),]) %>%
    `colnames<-`(c ("...1", colnames(.)[-1])) %>% ## to rename first column
    mutate ( ## updating row names
      `...1` = ifelse ( ## closing parenthesis
        grepl (" x ", `...1`), str_c (`...1`, ")"), `...1`)
      , `...1` = str_trim (`...1`)
      , `...1` = str_replace_all ( ## variable names
        `...1`, c (
          "pred_value" = "$\\\\log$ ({\\\\it margin}$^\\\\text{P}$)"
          , " x " = " ("
          , "midterm = 0" = "concurrent"
          , "midterm = 1" = "midterm"
          , "conurbano = Conurbano" = "{\\\\it Conurbano}"
          , "conurbano = Interior" = "Interior"
          , "concejoSize_2011 =" = "council size ="))) %>% 
    mutate (across (
      .cols = -`...1` ## all columns except Z
      , .fns = ~if_else (
        `...1` == "$p$-value of the difference"
        , str_c ("\\multirow{2}{*}{", .x, "}")
        , .x))))

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{$\\log(${\\it turnout}$^\\text{G}$$)$} & & \\multicolumn{2}{c}{$\\log(${\\it positive}$^\\text{G}$$)$} & & \\multicolumn{2}{c}{$\\log(${\\it first two}$^\\text{G}$$)$} & & \\multicolumn{2}{c}{$\\log(${\\it Golosov}$^\\text{G}$$)$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.65cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each panel-column combination reports a different specification.
  The (logged) outcome is always measured in the general election.
  $\\log$({\\it margin}$^{\\text{P}}$) is the difference between the logged vote total of the leading party minus the logged vote total of the trailing party in the primary election.
  When calculating the (pre-logged) primary outcome control in column~(8), only parties that classified to the general election are included in the denominator.
  The sample is the same in all panels, but panels~(b) through~(d) report {\\it marginal} effects for different subsets of the sample; the ``$p$-value of the difference'' indicates whether these are statistically different from each other.
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 2
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 14
addtorow$pos[[6]] <- 30
addtorow$pos[[7]] <- 30
addtorow$pos[[8]] <- 30
addtorow$pos[[9]] <- 31
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_logs[,1:3], "", tab_bymun_logs[,4:5], "", tab_bymun_logs[,6:7], "", tab_bymun_logs[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Betwen-party closeness in the primary and general election outcomes --in logs of absolute values"
                , label="T:ClosenessLogs")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.5) Main, Levels: pct_paso_classif -> pct_general (standardized)

# getting the coefficients
(tab_bymun_levels_std <- rbind (
  etable (mod_bymun_full_std[sample=c (12, 8, 10, 6), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:5),]
  , etable (mod_bymun_midt_std[sample=c (12, 8, 10, 6), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_midt_std_pval[sample=c (12, 8, 10, 6), fixef="year"]) %>% filter (coefficient=="pred_value_std:midterm") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~(concurrent vs. midterm)", rep ("", 8))
  , etable (mod_bymun_conu_std[sample=c (12, 8, 10, 6), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_conu_std_pval[sample=c (12, 8, 10, 6), fixef="year"]) %>% filter (coefficient=="pred_value_std:conurbanoConurbano") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~({\\it Conurbano} vs. Interior)", rep ("", 8))
  , etable (mod_bymun_size_std[sample=c (12, 8, 10, 6), fixef="year"], se.below=TRUE, signif.code=NA, digits="r3")[c (4:19, 27),]) %>%
    `colnames<-`(c ("...1", colnames(.)[-1])) %>% ## to rename first column
    mutate ( ## updating row names
      `...1` = ifelse ( ## closing parenthesis
        grepl (" x ", `...1`), str_c (`...1`, ")"), `...1`)
      , `...1` = str_trim (`...1`)
      , `...1` = str_replace_all ( ## variable names
        `...1`, c (
          "pred_value_std" = "{\\\\it margin}$^\\\\text{P}_{\\\\text{\\\\textsc{std}}}$"
          , " x " = " ("
          , "midterm = 0" = "concurrent"
          , "midterm = 1" = "midterm"
          , "conurbano = Conurbano" = "{\\\\it Conurbano}"
          , "conurbano = Interior" = "Interior"
          , "concejoSize_2011 =" = "council size ="))) %>% 
    mutate (across (
      .cols = -`...1` ## all columns except Z
      , .fns = ~if_else (
        `...1` == "$p$-value of the difference"
        , str_c ("\\multirow{2}{*}{", .x, "}")
        , .x))))

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{\\% {\\it turnout}$^\\text{G}_{\\text{\\textsc{std}}}$} & & \\multicolumn{2}{c}{\\% {\\it positive}$^\\text{G}_{\\text{\\textsc{std}}}$} & & \\multicolumn{2}{c}{\\% {\\it first two}$^\\text{G}_{\\text{\\textsc{std}}}$} & & \\multicolumn{2}{c}{{\\it Golosov}$^\\text{G}_{\\text{\\textsc{std}}}$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.5cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  All variables are normalized using their within-municipality standard deviation.
  Each panel-column combination reports a different specification.
  The outcome is always measured in the general election.
  {\\it margin}$^{\\text{P}}$ is the difference between the \\% of votes of the leading and trailing parties in the primary election, including only parties that classified to the general election in the denominator.
  The sample is the same in all panels, but panels~(b) through~(d) report {\\it marginal} effects for different subsets of the sample; the ``$p$-value of the difference'' indicates whether these are statistically different from each other.
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 2
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 14
addtorow$pos[[6]] <- 30
addtorow$pos[[7]] <- 30
addtorow$pos[[8]] <- 30
addtorow$pos[[9]] <- 31
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_levels_std[,1:3], "", tab_bymun_levels_std[,4:5], "", tab_bymun_levels_std[,6:7], "", tab_bymun_levels_std[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Between-party closeness in the primary and general election outcomes --Standardized estimates"
                , label="T:ClosenessStd")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.6) Predicting outcome in G_t with P_t vs G_t-1 or G_t-2 (in levels)

# getting the coefficients
tab_bymun_lags <- bind_rows (
  etable (mod_bymun_lag[rhs=2], se.below=TRUE, signif.code=NA, digits=2, digits.stats=2)[c (6:7, 13:14),]
  , etable (mod_bymun_lag[rhs=3], se.below=TRUE, signif.code=NA, digits=2, digits.stats=2)[c (6:7, 13:14),]
  , etable (mod_bymun_lag[rhs=4], se.below=TRUE, signif.code=NA, digits=2, digits.stats=2)[c (6:7, 13:14),]
  , etable (mod_bymun_lag[rhs=8], se.below=TRUE, signif.code=NA, digits=2, digits.stats=2)[c (6:11, 17:18),]) %>% 
  mutate ( ## updating row names
    `...1` = case_when (
      `...1` == "out_value_paso" ~ "lagged \\textsc{dv}$^{P}$"
      , `...1` == "out_value_lag_t1" ~ "lagged \\textsc{dv}$^{G-2}$"
      , `...1` == "out_value_lag_t2" ~ "lagged \\textsc{dv}$^{G-4}$"
      , `...1` == "Observations" ~ "[0.5ex] Observations"
      , TRUE ~ `...1`)
    , `...1` = str_replace_all (`...1`, c ("R2" = "$R^2$")))
tab_bymun_lags

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{3}{c}{\\% {\\it margin 1 vs. 2}$^\\text{G}$} & & \\multicolumn{3}{c}{\\% {\\it margin 2 vs. 3}$^\\text{G}$} & & \\multicolumn{3}{c}{\\% {\\it turnout}$^\\text{G}$} & & \\multicolumn{3}{c}{\\% {\\it positive}$^\\text{G}$} & & \\multicolumn{3}{c}{\\% {\\it first two}$^\\text{G}$} & & \\multicolumn{3}{c}{{\\it Golosov}$^\\text{G}$} \\\\ \\cmidrule{2-4} \\cmidrule{6-8} \\cmidrule{10-12} \\cmidrule{14-16} \\cmidrule{18-20} \\cmidrule{22-24} \n")
Header2 <- paste ("\\multicolumn{1}{l}{\\textbf{(a) Lag, primary}} & \\multicolumn{1}{c}{(1)} & \\multicolumn{1}{c}{(2)} & \\multicolumn{1}{c}{(3)} & & \\multicolumn{1}{c}{(4)} & \\multicolumn{1}{c}{(5)} & \\multicolumn{1}{c}{(6)} & & \\multicolumn{1}{c}{(7)} & \\multicolumn{1}{c}{(8)} & \\multicolumn{1}{c}{(9)} & & \\multicolumn{1}{c}{(10)} & \\multicolumn{1}{c}{(11)} & \\multicolumn{1}{c}{(12)} & & \\multicolumn{1}{c}{(13)} & \\multicolumn{1}{c}{(14)} & \\multicolumn{1}{c}{(15)} & & \\multicolumn{1}{c}{(16)} & \\multicolumn{1}{c}{(17)} & \\multicolumn{1}{c}{(18)} \\\\ \\midrule \n")
Header3 <- paste ("[2.5ex] \\multicolumn{23}{l}{\\textbf{(b) Lag from previous general election} ($t-2$)} \\\\ \\midrule \n")
Header4 <- paste ("[2.5ex] \\multicolumn{23}{l}{\\textbf{(c) Lag from previous concurrent or midterm general election} ($t-4$)} \\\\ \\midrule \n")
Header5 <- paste ("[2.5ex] \\multicolumn{23}{l}{\\textbf{(d) All three lags simultaneously}} \\\\ \\midrule \n")
add1 <- paste ("[2.5ex] \\multicolumn{1}{l}{Municipality \\textsc{fe}s} & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & \\multicolumn{1}{c}{\\textsc{y}} \\\\ \n")
add2 <- paste ("\\multicolumn{1}{l}{Year \\textsc{fe}s} & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} & & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{n}} & \\multicolumn{1}{c}{\\textsc{y}} \\\\ \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{24}{l}{
  \\begin{minipage}{22.5cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each panel-column combination reports a different specification.
  The outcome is always measured in the general election.
  The explanatory variables consist of the same indicator, but measured either (a) in the primary election; (b) in the previous general election, two years before; or (c) in the general election four years before (so that concurrency status is kept unchanged).
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 4
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 12
addtorow$pos[[6]] <- 20
addtorow$pos[[7]] <- 20
addtorow$pos[[8]] <- 20
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, add1, add2, Bottom1)
print (xtable ( bind_cols (tab_bymun_lags[,1:4], "", tab_bymun_lags[,5:7], "", tab_bymun_lags[,8:10], "", tab_bymun_lags[,11:13], "", tab_bymun_lags[,14:16], "", tab_bymun_lags[,17:19])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c","c","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Comparing the predictive power of the primary vs. previous general elections"
                , label="T:DescLags")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.7) Which elections? "Hose race" between local, provincial and national results

# getting the coefficients
(tab_bymun_horse <- rbind (
  etable (mod_bymun_full_horse[i=c (1:8)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:5),] ## "r3" means "round to 3 decimal places and always display 4 decimal places"
  , etable (mod_bymun_horse[i=c (1:8)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:9),]
  , etable (mod_bymun_horse[i=c (9:16)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:9),]
  , etable (mod_bymun_horse[i=c (17:24)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:9, 17),]) %>%
    `colnames<-`(c ("...1", colnames(.)[-1])) %>% 
    rename (expl_var = 1) %>%
    mutate (
      expl_var = expl_var %>% str_replace_all (c (
        "pred_value" = "{\\\\it margin}$^\\\\text{P}$"
        , "margin12_local_paso" = "{\\\\it margin}$^\\\\text{P}_{\\\\text{municipal}}$"
        , "margin12_provincial_paso" = "{\\\\it margin}$^\\\\text{P}_{\\\\text{provincial}}$"
        , "margin12_national_paso" = "{\\\\it margin}$^\\\\text{P}_{\\\\text{national}}$"))))
    
# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{\\% {\\it turnout}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it positive}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it first two}$^\\text{G}$} & & \\multicolumn{2}{c}{{\\it Golosov}$^\\text{G}$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Header2 <- paste ("\\multicolumn{1}{l}{\\textbf{(a) Original results (2013-23)}} & \\multicolumn{1}{c}{(1)} & \\multicolumn{1}{c}{(2)} & & \\multicolumn{1}{c}{(3)} & \\multicolumn{1}{c}{(4)} & & \\multicolumn{1}{c}{(5)} & \\multicolumn{1}{c}{(6)} & & \\multicolumn{1}{c}{(7)} & \\multicolumn{1}{c}{(8)} \\\\ \\midrule \n")
Header3_hr <- paste ("[1.75ex] \\multicolumn{12}{l}{\\textbf{(b)} Outcome measured in \\textbf{\\textsc{municipal}} elections} \\\\ \\midrule \n")
Header4_hr <- paste ("[1.75ex] \\multicolumn{12}{l}{\\textbf{(c)} Outcome measured in \\textbf{\\textsc{provincial}} elections} \\\\ \\midrule \n")
Header5_hr <- paste ("[1.75ex] \\multicolumn{12}{l}{\\textbf{(d)} Outcome measured in \\textbf{\\textsc{national}} elections} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.5cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each panel-column combination reports a different specification.
  The outcome is always measured in the general election.
  {\\it margin}$^{\\text{P}}$ is the difference between the \\% of votes of the leading and trailing parties in the primary election, including only parties that classified to the general election in the denominator.
  Since primary results for the municipal races were not available for 2011, the top panel employs the same data as Table~\\ref{T:ClosenessLevelsShort}(a), but for 2013-2023 only.
  The next three panels employ provisional results aggregated from precinct-level data, measured separately for municipal, provincial and national elections. %The results for concurrent years look at executive elections (mayors, governor and president) are used; those for midterm years, at legislative elections (councilors, provincial senators and deputies, national deputies).
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 2
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 14
addtorow$pos[[6]] <- 20
addtorow$pos[[7]] <- 20
addtorow$pos[[8]] <- 20
addtorow$pos[[9]] <- 21
addtorow$command <- c (Header1, Header2, Header3_hr, Header4_hr, Header5_hr
                       , add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_horse[,1:3], "", tab_bymun_horse[,4:5], "", tab_bymun_horse[,6:7], "", tab_bymun_horse[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Between-party closeness in the primary and general election outcomes --``Horse race'' between variables measured at the municipal, provincial and national levels"
                , label="T:ClosenessHorseRace")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.8) Intra-party competition (I): Alternative configurations

# getting the coefficients
(tab_bymun_primary <- rbind (
  etable (mod_bymun_primary[i=c (1:8)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:11),]
  , etable (mod_bymun_primary[i=c (9:16)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:11, 19),]) %>%
    `colnames<-`(c ("...1", colnames(.)[-1])) %>% 
    rename (expl_var = 1) %>%
    mutate (
      expl_var = expl_var %>% str_replace_all (c (
        "pred_value x primary_comp_configuration = neither" = "{\\\\it margin}$^\\\\text{P}$ (neither)"
        , "pred_value x primary_comp_configuration = first" = "{\\\\it margin}$^\\\\text{P}$ (first-placed only)"
        , "pred_value x primary_comp_configuration = second" = "{\\\\it margin}$^\\\\text{P}$ (second-placed only)"
        , "pred_value x primary_comp_configuration = both" = "{\\\\it margin}$^\\\\text{P}$ (both)"))))

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{\\% {\\it turnout}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it positive}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it first two}$^\\text{G}$} & & \\multicolumn{2}{c}{{\\it Golosov}$^\\text{G}$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Header2 <- paste ("\\multicolumn{1}{l}{\\textbf{(a) Single vs. multiple lists}} & \\multicolumn{1}{c}{(1)} & \\multicolumn{1}{c}{(2)} & & \\multicolumn{1}{c}{(3)} & \\multicolumn{1}{c}{(4)} & & \\multicolumn{1}{c}{(5)} & \\multicolumn{1}{c}{(6)} & & \\multicolumn{1}{c}{(7)} & \\multicolumn{1}{c}{(8)} \\\\ \\midrule \n")
Header3 <- paste ("[1.75ex] \\multicolumn{12}{l}{\\textbf{(b) Counting multiple lists only if the margin between the two largest ones is lower than 25~pp.}} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.5cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each panel-column combination reports a different specification.
  The outcome is always measured in the general election.
  {\\it margin}$^{\\text{P}}$ is the difference between the \\% of votes of the leading and trailing parties in the primary election, including only parties that classified to the general election in the denominator.
  Estimate report separate {\\it marginal} effects depending on whether (i) neither of the two largest parties in the primary; (ii) only the largest party in the primary; (iii) only the second largest party in the primary; or (iv) the two largest parties in the primary featured multiple lists, respectively.
  In panel~(a) all instances of intra-party competition are treated as such; in panel~(b), only cases in which the largest faction win by a margin of 25~percentage points or lower are treated as instances of intra-party competition.
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 8
addtorow$pos[[4]] <- 16
addtorow$pos[[5]] <- 16
addtorow$pos[[6]] <- 16
addtorow$pos[[7]] <- 17
addtorow$command <- c (Header1, Header2, Header3, add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_primary[,1:3], "", tab_bymun_primary[,4:5], "", tab_bymun_primary[,6:7], "", tab_bymun_primary[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Between-party closeness in the primary and general election outcomes --Heterogeneity depending on which of the two-placed parties faced a competitive primary"
                , label="T:ClosenessIntraHet")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.9) Intra-party competition (II): largest faction only

# getting the coefficients
(tab_bymun_levels_primary <- rbind (
  etable (mod_bymun_full_primary[i=c (7:8, 3:4, 5:6, 1:2)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:5),]
  , etable (mod_bymun_midt_primary[i=c (7:8, 3:4, 5:6, 1:2)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_midt_primary_pval[sample=c(4, 2, 3, 1)]) %>% filter (coefficient=="pred_value_primary_abs:midterm") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~(concurrent vs. midterm)", rep ("", 8))
  , etable (mod_bymun_conu_primary[i=c (7:8, 3:4, 5:6, 1:2)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , c ("$p$-value of the difference", sprintf ("%.3f", unlist (coeftable (mod_bymun_conu_primary_pval[sample=c(4, 2, 3, 1)]) %>% filter (coefficient=="pred_value_primary_abs:conurbanoConurbano") %>% select (`Pr(>|t|)`))))
  , c ("~~~~~~~~~~({\\it Conurbano} vs. Interior)", rep ("", 8))
  , etable (mod_bymun_size_primary[i=c (7:8, 3:4, 5:6, 1:2)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:19, 27),]) %>%
    `colnames<-`(c ("...1", colnames(.)[-1])) %>% ## to rename first column
    mutate ( ## updating row names
      `...1` = ifelse ( ## closing parenthesis
        grepl (" x ", `...1`), str_c (`...1`, ")"), `...1`)
      , `...1` = str_trim (`...1`)
      , `...1` = str_replace_all ( ## variable names
        `...1`, c (
          "pred_value_primary_abs" = "{\\\\it margin}$^\\\\text{P}$"
          , " x " = " ("
          , "midterm = 0" = "concurrent"
          , "midterm = 1" = "midterm"
          , "conurbano = Conurbano" = "{\\\\it Conurbano}"
          , "conurbano = Interior" = "Interior"
          , "concejoSize_2011 =" = "council size ="))) %>% 
    mutate (across (
      .cols = -`...1` ## all columns except Z
      , .fns = ~if_else (
        `...1` == "$p$-value of the difference"
        , str_c ("\\multirow{2}{*}{", .x, "}")
        , .x))))

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{\\% {\\it turnout}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it positive}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it first two}$^\\text{G}$} & & \\multicolumn{2}{c}{{\\it Golosov}$^\\text{G}$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Header2 <- paste ("\\multicolumn{1}{l}{\\textbf{(a) Overall effect}} & \\multicolumn{1}{c}{(1)} & \\multicolumn{1}{c}{(2)} & & \\multicolumn{1}{c}{(3)} & \\multicolumn{1}{c}{(4)} & & \\multicolumn{1}{c}{(5)} & \\multicolumn{1}{c}{(6)} & & \\multicolumn{1}{c}{(7)} & \\multicolumn{1}{c}{(8)} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.5cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each panel-column combination reports a different specification.
  The outcome is always measured in the general election.
  {\\it margin}$^{\\text{P}}$ is the absolute value of the difference between the \\% of votes of the largest faction of the leading and trailing parties in the primary election, including only parties that classified to the general election in the denominator.
  The sample is the same in all panels, but panels~(b) through~(d) report {\\it marginal} effects for different subsets of the sample; the ``$p$-value of the difference'' indicates whether these are statistically different from each other.
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 2
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 14
addtorow$pos[[6]] <- 30
addtorow$pos[[7]] <- 30
addtorow$pos[[8]] <- 30
addtorow$pos[[9]] <- 31
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_levels_primary[,1:3], "", tab_bymun_levels_primary[,4:5], "", tab_bymun_levels_primary[,6:7], "", tab_bymun_levels_primary[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Between-party closeness in the primary and general election outcomes --Calculating margin using largest faction only"
                , label="T:ClosenessIntraLargest")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.10) Heterogeneity by distance to council majority

# getting the coefficients
(tab_bymun_majority <- rbind (
  etable (mod_bymun_maj[i=c (1:8)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:7),]
  , etable (mod_bymun_maj_cale[i=c (1:8)], se.below=TRUE, signif.code=NA, digits="r3")[c (4:11, 19),]) %>%
    rename (expl_var = 1) %>%
    mutate (
      expl_var = expl_var %>% str_replace_all (c (
        "pred_value x category_maj_simp = incnear" = "{\\\\it margin}$^\\\\text{P}$ (near majority)"
        , "pred_value x category_maj_simp = incfar" = "{\\\\it margin}$^\\\\text{P}$ (away from majority)"
        , "incnear x pred_value x category_maj_cale = concurrent" = "{\\\\it margin}$^\\\\text{P}$ (concurrent, near)"
        , "incfar x pred_value x category_maj_cale = concurrent" = "{\\\\it margin}$^\\\\text{P}$ (concurrent, away)"
        , "incnear x pred_value x category_maj_cale = midterm" = "{\\\\it margin}$^\\\\text{P}$ (midterm, near)"
        , "incfar x pred_value x category_maj_cale = midterm" = "{\\\\it margin}$^\\\\text{P}$ (midterm, away)"))))

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{\\% {\\it turnout}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it positive}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it first two}$^\\text{G}$} & & \\multicolumn{2}{c}{{\\it Golosov}$^\\text{G}$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Header2 <- paste ("\\multicolumn{1}{l}{\\textbf{(a) Distance to majority}} & \\multicolumn{1}{c}{(1)} & \\multicolumn{1}{c}{(2)} & & \\multicolumn{1}{c}{(3)} & \\multicolumn{1}{c}{(4)} & & \\multicolumn{1}{c}{(5)} & \\multicolumn{1}{c}{(6)} & & \\multicolumn{1}{c}{(7)} & \\multicolumn{1}{c}{(8)} \\\\ \\midrule \n")
Header3 <- paste ("[1.75ex] \\multicolumn{12}{l}{\\textbf{(b) Marginal effects by distance to majority in Concurrent vs. Midterm elections}} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{14.5cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each panel-column combination reports a different specification.
  The outcome is always measured in the general election.
  {\\it margin}$^{\\text{P}}$ is the difference between the \\% of votes of the leading and trailing parties in the primary election, including only parties that classified to the general election in the denominator.
  Distance to a majority in the council is always measured for the party expected to be the incumbent after the general election. In concurrent elections, this is the most voted party in the primary; in midterm elections, this is the party that won thre previous mayoral election.
  Due to data limitations, the sample is restricted to cases where either the \\textsc{pj} and the \\textsc{ucr} are or are expected to be the incumbent.
  The incumbent party is coded as being ``near'' to a council majority if the number of seats obtained in the previous general electios plus the number of seats expected on the basis of the primary results add up to 50\\% of seats in the council, or 50\\% minus one seat. In all other cases, the incumbent is coded as ``away'' from a majority --it either has a very comfortably majority that is not at risk, or it is far away from obtaining one.
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 4
addtorow$pos[[4]] <- 12
addtorow$pos[[5]] <- 12
addtorow$pos[[6]] <- 12
addtorow$pos[[7]] <- 13
addtorow$command <- c (Header1, Header2, Header3, add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_majority[,1:3], "", tab_bymun_majority[,4:5], "", tab_bymun_majority[,6:7], "", tab_bymun_majority[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Between-party closeness in the primary and general election outcomes --Heterogeneity by distance to council majority"
                , label="T:ClosenessMajority")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (3.3.11) midterm X district magnitude

# getting the coefficients
(tab_bymun_midt_size <- rbind (
  etable (mod_bymun_midt_size, se.below=TRUE, signif.code=NA, digits="r3")[c (4:19, 27),]) %>%
    `colnames<-`(c ("...1", colnames(.)[-1])) %>%
    mutate ( ## updating row names
      `...1` = ifelse ( ## closing parenthesis
        grepl (" x ", `...1`), str_c (`...1`, ")"), `...1`)
      , `...1` = str_trim (`...1`)
      , `...1` = str_replace_all ( ## variable names
        `...1`, c (
          "pred_value" = "{\\\\it margin}$^\\\\text{P}$"
          , " x " = " ("
          , "concejoSize_2011 =" = "district magnitude ="
          , " = 6" = " = 3"
          , " = 10" = " = 5"
          , " = 12" = " = 6"
          , " = 14" = " = 7"
          , " = 16" = " = 8"
          , " = 18" = " = 9"
          , " = 20" = " = 10"
          , " = 24" = " = 12"))))

# exporting as a LaTeX table
Header1 <- paste ("\\toprule & \\multicolumn{2}{c}{\\% {\\it turnout}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it positive}$^\\text{G}$} & & \\multicolumn{2}{c}{\\% {\\it first two}$^\\text{G}$} & & \\multicolumn{2}{c}{{\\it Golosov}$^\\text{G}$} \\\\ \\cmidrule{2-3} \\cmidrule{5-6} \\cmidrule{8-9} \\cmidrule{11-12} \n")
Header2 <- paste ("\\multicolumn{1}{l}{\\textbf{Marginal effect by district magnitude}} & \\multicolumn{1}{c}{(1)} & \\multicolumn{1}{c}{(2)} & & \\multicolumn{1}{c}{(3)} & \\multicolumn{1}{c}{(4)} & & \\multicolumn{1}{c}{(5)} & \\multicolumn{1}{c}{(6)} & & \\multicolumn{1}{c}{(7)} & \\multicolumn{1}{c}{(8)} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{15.75cm}~\\\\
  \\footnotesize \\textsc{ols} regression estimates.
  Each column reports a different specification.
  The outcome is always measured in the general election.
  The sample is limited to midterm elections.
  {\\it margin}$^{\\text{P}}$ is the difference between the \\% of votes of the leading and trailing parties in the primary election, including only parties that classified to the general election in the denominator.
  District magnitude is measured in 2011.
  Estimates report {\\it marginal} effects for different subsets of the sample.
  Standard errors clustered by municipality in parentheses.
  \\end{minipage}}\\\\")

addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 16
addtorow$pos[[4]] <- 16
addtorow$pos[[5]] <- 16
addtorow$pos[[6]] <- 17
addtorow$command <- c (Header1, Header2, add1, add2, add3, Bottom1)
print (xtable ( bind_cols (tab_bymun_midt_size[,1:3], "", tab_bymun_midt_size[,4:5], "", tab_bymun_midt_size[,6:7], "", tab_bymun_midt_size[,8:9])
                , align=c ("l","l","c","c","c","c","c","c","c","c","c","c","c")
                , digits=0
                , caption="Between-party closeness in the primary and general election outcomes: District magnitude in midterm elections"
                , label="T:ClosenessMidtSize")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )




######################################
######################################
########## (4) RD analyses ###########
######################################
######################################

###############################
##### (4.1) Balance checks ----
###############################

### (4.1.1) Balance checks -> effect ov 1 (vs 2) or 2 (vs 3) on pre-determined, party-specific variables ----

## (4.1.1.1) creating the dataset for the balance checks
byparty_rd_bal <- byparty_full %>% ## getting two runnings: 1 vs 2 or 2 vs 3
  mutate (comparison = "1 vs 2") %>% 
  bind_rows (byparty_full %>% mutate (comparison = "2 vs 3")) %>%
  mutate (
    
    ## identifying reference parties
    comparison = factor (comparison)
    , ref_rd_top = case_when (
      comparison=="1 vs 2" & rank_partido_classif_paso==1 ~ 1
      , comparison=="2 vs 3" & rank_partido_classif_paso==2 ~ 1
      , TRUE ~ 0)
    , ref_rd_2nd = case_when (
      comparison=="1 vs 2" & rank_partido_classif_paso==2 ~ 1
      , comparison=="2 vs 3" & rank_partido_classif_paso==3 ~ 1
      , TRUE ~ 0)
    
    ## creating outcome variables for the balance checks
    , inc_dummy = if_else (is.na (inc_dummy), 0, inc_dummy)
    , pjoficial = if_else (is.na (pjoficial), 0, pjoficial)
    , ucr = if_else (is.na (ucr), 0, ucr)
    , presi_gob_party = case_when ( ## 2011-2023 president and governor always from same party
      pjoficial == 1 & year %in% c (2011:2015, 2021:2023) ~ 1
      , ucr == 1 & year %in% c (2017:2019) ~ 1
      , pjoficial == 0 | ucr == 0 ~ 0)) %>% 
  pivot_longer ( ## getting a separate row for each reference party
    cols = ref_rd_top:ref_rd_2nd
    , names_to = "ref_rd", values_to = "value") %>% 
  filter (value==1) %>% ## only keep reference parties
  mutate ( ## getting different reference samples
    ref_rd = factor (ref_rd)
    , sample_rd_all = 1
    , sample_rd_calendar = 1
    , sample_rd_conu = 1
    , sample_rd_size = 1) %>% 
  pivot_longer (
    cols = sample_rd_all:sample_rd_size
    , names_to = "sample_group", values_to="sample_ref") %>% 
  mutate (
    sample_ref = case_when (
      sample_group == "sample_rd_all" ~ "all observations"
      , sample_group == "sample_rd_calendar" & midterm == 0 ~ "concurrent elections"
      , sample_group == "sample_rd_calendar" & midterm == 1 ~ "midterm elections"
      , sample_group == "sample_rd_conu" ~ as.character (conurbano)
      , sample_group == "sample_rd_size" & muni_size=="small" ~ "small municipalities"
      , sample_group == "sample_rd_size" & muni_size=="large" ~ "large municipalities")
    , sample_group = factor (sample_group, levels=c (
      "sample_rd_all", "sample_rd_calendar", "sample_rd_conu", "sample_rd_size"))
    , sample_ref = factor (sample_ref, levels=c (
      "all observations", "concurrent elections", "midterm elections", "Conurbano", "Interior", "small municipalities", "large municipalities"))) %>% 
  pivot_longer ( ## getting a separate row for each outcome
    cols = c (
      presi_gob_party, inc_dummy, pjoficial, ucr
      , primary_comp, n_faccions, votos_faccion_pct_party, primary_golosov)
    , names_to = "outcome_name", values_to = "outcome_value") %>%
  mutate (
    outcome_name = factor (outcome_name, levels=c (
      "presi_gob_party", "inc_dummy", "pjoficial", "ucr"
      , "primary_comp", "n_faccions", "votos_faccion_pct_party", "primary_golosov"))
    , outcome_name_full = dplyr::recode (
      as.character (outcome_name)
      , presi_gob_party = "copartisan\npresi & governor"
      , inc_dummy = "party:\nincumbent"
      , pjoficial = "party: PJ"
      , ucr = "party: UCR"
      , primary_comp = "competitive\nprimary"
      , n_faccions = "N factions"
      , votos_faccion_pct_party = "% largest\nfaction"
      , primary_golosov = "Golosov\n(intra-party)")
    , outcome_name_full = factor (outcome_name_full, levels = c (
      "copartisan\npresi & governor", "party:\nincumbent", "party: PJ", "party: UCR", "competitive\nprimary", "N factions", "% largest\nfaction", "Golosov\n(intra-party)"))
    , running = case_when (
      comparison == "1 vs 2" & rank_partido_classif_paso == 1 ~ partido_1_pct_classif_paso - partido_2_pct_classif_paso
      , comparison == "1 vs 2" & rank_partido_classif_paso == 2 ~ partido_2_pct_classif_paso - partido_1_pct_classif_paso
      , comparison == "2 vs 3" & rank_partido_classif_paso == 2 ~ partido_2_pct_classif_paso - partido_3_pct_classif_paso
      , comparison == "2 vs 3" & rank_partido_classif_paso == 3 ~ partido_3_pct_classif_paso - partido_2_pct_classif_paso)
    , muni_year = str_c (municipio, year, sep= "_") %>% factor ()
    , value = NULL) %>%
  filter (
    year >= 2011 & !is.na (running)
    & ((comparison == "1 vs 2" & n_parties_paso >= 2)
       | (comparison == "2 vs 3" & n_parties_paso >= 3))) %>% 
  group_by (municipio, year, comparison, sample_group, sample_ref, outcome_name, outcome_name_full) %>% 
  mutate (n_obs_muni_year = n()) %>% 
  ungroup ()
summary (byparty_rd_bal)


## (4.1.1.2) loop to get all the results

# objects to store the values of interest
rd_balance <- NULL ## we need this to "store" the results

# loop to get all the results
for (s in 1:n_distinct (byparty_rd_bal$sample_ref)){ ## (sub)sample
  
  for (c in 1:n_distinct (byparty_rd_bal$comparison)){ ## comparison group: 1v2 or 2v3
    
    for (o in 1:n_distinct (byparty_rd_bal$outcome_name_full)){ ## outcome
      
      data_tmp <- byparty_rd_bal %>% 
        filter (
          sample_ref == unique (byparty_rd_bal$sample_ref)[s]
          & comparison == unique (byparty_rd_bal$comparison)[c]
          & outcome_name_full == unique (byparty_rd_bal$outcome_name_full)[o]
        )
      
      
      ## RD analyses
      if (
        (data_tmp %>% filter (running < 0) %>% nrow () < 12) ## fewer than 12 observations on either side of the cutoff
        | (data_tmp %>% filter (running > 0) %>% nrow () < 12)
      ) {
        ## do nothing ##
        
      } else { ## keep running the loop
        
        # no controls
        rd_main <- with (data_tmp, rdrobust (
          y=outcome_value, x=running, bwselect="mserd", covs=NULL, cluster=year, p=1, q=1+1))
        
        # SD of outcome in the control group within (conventional) RD bandwidth
        sd_main_c <- sd (filter (data_tmp, running<0 & abs(running)<=rd_main$bws[1,1])$outcome_value, na.rm=TRUE)
        
        # power
        pow_main01 <- rdpower (
          data=as.data.frame (data_tmp) %>% select (outcome_value, running)
          , cutoff=0, alpha=0.05, level=95, p=1, q=1+1, covs=NULL, plot=FALSE
          , tau=sd_main_c, cluster=data_tmp$year)
        pow_main02 <- rdpower (
          data=as.data.frame (data_tmp) %>% select (outcome_value, running)
          , cutoff=0, alpha=0.05, level=95, p=1, q=1+1, covs=NULL, plot=FALSE
          , tau=sd_main_c/2, cluster=data_tmp$year)
        pow_main03 <- rdpower (
          data=as.data.frame (data_tmp) %>% select (outcome_value, running)
          , cutoff=0, alpha=0.05, level=95, p=1, q=1+1, covs=NULL, plot=FALSE
          , tau=rd_main$Estimate[1], cluster=data_tmp$year)
        }
      
      
      ## storing everything
      rd_balance <- rbind (
        rd_balance
        
        , c ( ## main results
          unique (byparty_rd_bal$sample_ref)[s] %>% as.character ()
          , unique (byparty_rd_bal$comparison)[c] %>% as.character ()
          , unique (byparty_rd_bal$outcome_name_full)[o] %>% as.character ()
          , "mserd", 1, extract_rd (rd_main), sd_main_c
          , pow_main01$power.rbc, pow_main02$power.rbc, pow_main03$power.rbc)
      )}}}
rd_balance <- as.data.frame (rd_balance)
colnames (rd_balance) <- c ("sample", "comparison", "outcome", "bwdselect", "poly", "est", "ci", "n", "bwd", "pval", "sd_c", "pow_sd", "pow_sd2", "pow_beta")
rd_balance2 <- rd_balance %>%
  as.data.frame () %>%
  mutate (
    sample = factor (sample, levels=c (
      "all observations", "concurrent elections", "midterm elections", "Conurbano", "Interior", "small municipalities", "large municipalities"))
    , comparison = factor (comparison)
    , ref_party2 = case_when (
      comparison == "1 vs 2" ~ "first/second"
      , comparison == "2 vs 3" ~ "second/third")
    , outcome = factor (outcome, levels = c ( ## note the reversed order
      "Golosov\n(intra-party)", "% largest\nfaction", "N factions", "competitive\nprimary", "party: UCR", "party: PJ", "party:\nincumbent", "copartisan\npresi & governor"))
    , n = str_replace_all (n, "\\|", "$|$")
    , bwd = sprintf ("%.1f", as.numeric (bwd)) %>% as.character ()
    , pval_num = as.numeric (pval)
    , pval = sprintf ("%.2f", as.numeric (pval)) %>% as.character ()
    , bwdselect = factor (bwdselect)
    , poly = as.numeric (poly)
    , est = sprintf ("%.2f", round (as.numeric (est), 2))
    , sd_c = sprintf ("%.2f", round (as.numeric (sd_c), 2))
    , pow_sd = sprintf ("%.2f", round (as.numeric (pow_sd), 2))
    , pow_sd2 = sprintf ("%.2f", round (as.numeric (pow_sd2), 2))
    , pow_beta = sprintf ("%.2f", round (as.numeric (pow_beta), 2))) %>%
  separate_wider_delim ( ## two decimals for all ci's
    cols=ci, delim=":", names=c ("ci_left", "ci_right")) %>%
  mutate (
    ci_left = str_replace_all (ci_left, "\\[", "")
    , ci_right = str_replace_all (ci_right, "\\]", "")
    , ci = str_c ("[", sprintf ("%.2f", as.numeric (ci_left)), " : ", sprintf ("%.2f", as.numeric (ci_right)), "]")) %>%
  arrange (bwdselect, poly, comparison, outcome)
summary (rd_balance2)

# creating all plots in one step (w/conditional formatting for top and bottom ones)
plots_bal <- lapply (seq_along (levels (rd_balance2$sample)), function(i) {
  
  samp <- levels (rd_balance2$sample)[i]
  p <- ggplot (rd_balance2 %>% filter(sample == samp)
               , aes (x=pval_num, y=outcome, color=comparison)) +
    geom_vline (xintercept=c (0.05, 0.10), linetype=2, col=col_cutoff) +
    geom_point (size=size_bin * 2) +
    ylab ("") + labs (title = samp)
  
  # applying different settings based on plot number
  if (i %in% c (2, 4, 6)) { ## remove x-axis label and legend
    p <- p + scale_x_continuous (
      name="", limits=c (0, 1), breaks=seq (0, 1, by = 0.1)) +
      scale_color_manual (values = c (col_gen, col_paso)) +
      theme (legend.position = "none")
    
    } else { ## for all other plots: keep x-axis label and legend
    p <- p + scale_x_continuous (
       name=expression (italic ("p") * "–value")
       , limits=c (0, 1), breaks=seq (0, 1, by=0.1)) +
      scale_color_manual ("comparison:", values=c (col_gen, col_paso)) +
      theme (legend.position="bottom"
             , legend.box.margin=margin (-15, -9, -9, -9))}
  
  return(p)
  })

# drawing the combined plot
(p_bal_all <- plot_grid (
  plot_grid (
    ggplot () + theme_void ()
    , plots_bal[[1]] ## all observations
    , ncol=1, nrow=3
    , rel_heights=c (1, 2, 1)  ## to center vertically
    )
  , plot_grid (
    plots_bal[[2]], plots_bal[[4]], plots_bal[[6]]
    , plots_bal[[3]], plots_bal[[5]], plots_bal[[7]]
    , ncol=3, nrow=2)
  , ncol=2, rel_widths=c (1, 3) ## width ratio between left and right panels
  ))




#########################
##### (4.2) RD plots ----
#########################

### (4.2.1) Lengthening the data to make the loop faster and easier ----

# list of outcome variables
outcomes <- c (
  "winner_general", "votos_partido_pct_general", "votos_partido_pct_delta", "votos_partido_log_delta"
  , "winner_general_lag", "votos_partido_pct_general_lag", "votos_partido_pct_delta_lag", "votos_partido_log_delta_lag")

# note that:
byparty %>% filter (year >= 2011 & n_parties_paso == 1) %>% nrow () ## no single-party elections
byparty %>% filter (year >= 2011 & n_parties_paso == 2) %>% nrow () / 2 ## 60 elections with just two parties in the primary -> no 2 vs 3 margin. We'll discard these at the end

# creating the dataset proper
byparty_rd <- byparty_full %>% ## getting two runnings: 1 vs 2 or 2 vs 3
  mutate (comparison = "1 vs 2", parties_denom = "classif") %>% 
  bind_rows (byparty_full %>% mutate (comparison = "2 vs 3", parties_denom = "classif")) %>% 
  bind_rows (byparty_full %>% mutate (comparison = "1 vs 2", parties_denom = "all")) %>% ## all parties that participated in the primary included in the denominator, even if they didn't pass the 1.5% threshold
  bind_rows (byparty_full %>% mutate (comparison = "2 vs 3", parties_denom = "all")) %>% 
  mutate (
    period = (year-1983)/2+1 ## to get 1, 2, etc
    , votos_partido_pct_delta = case_when (
      parties_denom == "classif" ~ votos_partido_pct_general - votos_partido_pct_classif_paso
      , parties_denom == "all" ~ votos_partido_pct_general - votos_partido_pct_paso)
    , votos_partido_log_delta = log (votos_partido_general/votos_partido_paso)
    , winner_general = winner_general*100 ## to interpret the estimates as percentage point changes
    , comparison = factor (comparison)
    
    ## identifying reference parties
    , ref_rd_top = case_when (
      comparison=="1 vs 2" & parties_denom == "classif" & rank_partido_classif_paso==1 ~ 1
      , comparison=="2 vs 3" & parties_denom == "classif" & rank_partido_classif_paso==2 ~ 1
      , comparison=="1 vs 2" & parties_denom == "all" & rank_partido_paso==1 ~ 1
      , comparison=="2 vs 3" & parties_denom == "all" & rank_partido_paso==2 ~ 1
      , TRUE ~ 0)
    , ref_rd_2nd = case_when (
      comparison=="1 vs 2" & parties_denom == "classif" & rank_partido_classif_paso==2 ~ 1
      , comparison=="2 vs 3" & parties_denom == "classif" & rank_partido_classif_paso==3 ~ 1
      , comparison=="1 vs 2" & parties_denom == "all" & rank_partido_paso==2 ~ 1
      , comparison=="2 vs 3" & parties_denom == "all" & rank_partido_paso==3 ~ 1
      , TRUE ~ 0)
    , ref_rd_inc = ifelse (!is.na (inc_known) & inc_known==1 & inc_dummy == 1, 1, 0)
    , ref_rd_pj = ifelse (pjoficial == 1, 1, 0)
    , ref_rd_ucr = ifelse (ucr==1, 1, 0)) %>% 
  pivot_longer ( ## getting a separate row for each reference party
    cols = ref_rd_top:ref_rd_ucr
    , names_to = "ref_rd", values_to = "value") %>% 
  filter (value==1) %>% ## only keep reference parties
  mutate (
    ref_rd = factor (ref_rd)
    , ref_rd_party = case_when (
      ref_rd %in% c ("ref_rd_top", "ref_rd_2nd") ~ "all parties"
      , ref_rd == "ref_rd_inc" ~ "incumbent party"
      , ref_rd == "ref_rd_pj" ~ "PJ"
      , ref_rd == "ref_rd_ucr" ~ "UCR")
    , ref_rd_party = factor(ref_rd_party, levels = c(
      "all parties", "incumbent party", "PJ", "UCR"))
    , value = NULL
    
    ## getting different reference samples
    , sample_rd_all = 1
    , sample_rd_calendar = 1
    , sample_rd_conu = 1
    , sample_rd_size = 1) %>% 
  pivot_longer (
    cols = sample_rd_all:sample_rd_size
    , names_to = "sample_group", values_to="sample_ref") %>% 
  mutate (
    parties_denom = factor (parties_denom, levels=c ("classif", "all"))
    , sample_ref = case_when (
      sample_group == "sample_rd_all" ~ "all observations"
      , sample_group == "sample_rd_calendar" & midterm == 0 ~ "concurrent elections"
      , sample_group == "sample_rd_calendar" & midterm == 1 ~ "midterm elections"
      , sample_group == "sample_rd_conu" ~ as.character (conurbano)
      , sample_group == "sample_rd_size" & muni_size=="small" ~ "small municipalities"
      , sample_group == "sample_rd_size" & muni_size=="large" ~ "large municipalities")
    , sample_group = factor (sample_group, levels=c (
      "sample_rd_all", "sample_rd_calendar", "sample_rd_conu", "sample_rd_size"))
    , sample_ref = factor (sample_ref, levels=c (
      "all observations", "concurrent elections", "midterm elections", "Conurbano", "Interior", "small municipalities", "large municipalities"))) %>% 
  pivot_longer ( ## getting a separate row for each outcome
    cols = outcomes[!grepl ("_lag", outcomes)]
    , names_to = "outcome_name", values_to = "outcome_value") %>% ## getting lagged version of the outcome variables
  group_by (comparison, sample_group, ref_rd, municipio, outcome_name) %>% ## notice the "sample_group" instead of "sample_ref": if we use the latter, we split municipalities between concurrent and midterm years, and get lots of NA's for lagged delta values in 2013 
  mutate (outcome_value_lag = lag (outcome_value, n=1, order_by=period)) %>%
  ungroup () %>% ## pivoting longer to get leads and lags
  pivot_longer (
    cols = outcome_value:outcome_value_lag
    , names_to = "lag", values_to = "outcome_value") %>% 
  mutate (
    pjoficial = if_else (is.na (pjoficial), 0, pjoficial)
    , ucr = if_else (is.na (ucr), 0, ucr)
    , inc_dummy = if_else (is.na (inc_dummy), 0, inc_dummy)
    , presi_gob_party = case_when ( ## 2011-2023 president and governor always from same party
      pjoficial == 1 & year %in% c (2011:2015, 2021:2023) ~ 1
      , ucr == 1 & year %in% c (2017:2019) ~ 1
      , pjoficial == 0 | ucr == 0 ~ 0)
    
    ## modifying other variables
    , outcome_name = ifelse (
      grepl ("_lag", lag), str_c (outcome_name, "_lag", sep=""), outcome_name)
    , outcome_name = factor (outcome_name, levels=outcomes)
    , toptwo = case_when (
      comparison == "1 vs 2" & parties_denom == "classif" & rank_partido_classif_paso %in% 1:2 ~ 1
      , comparison == "2 vs 3" & parties_denom == "classif" & rank_partido_classif_paso %in% 2:3 ~ 1
      , comparison == "1 vs 2" & parties_denom == "all" & rank_partido_paso %in% 1:2 ~ 1
      , comparison == "2 vs 3" & parties_denom == "all" & rank_partido_paso %in% 2:3 ~ 1
      , TRUE ~ 0)
    , running = case_when (
      comparison == "1 vs 2" & parties_denom == "classif" & rank_partido_classif_paso == 1 ~ partido_1_pct_classif_paso - partido_2_pct_classif_paso
      , comparison == "1 vs 2" & parties_denom == "classif" & rank_partido_classif_paso == 2 ~ partido_2_pct_classif_paso - partido_1_pct_classif_paso
      , comparison == "2 vs 3" & parties_denom == "classif" & rank_partido_classif_paso == 2 ~ partido_2_pct_classif_paso - partido_3_pct_classif_paso
      , comparison == "2 vs 3" & parties_denom == "classif" & rank_partido_classif_paso == 3 ~ partido_3_pct_classif_paso - partido_2_pct_classif_paso
      , comparison == "1 vs 2" & parties_denom == "all" & rank_partido_paso == 1 ~ partido_1_pct_paso - partido_2_pct_paso
    , comparison == "1 vs 2" & parties_denom == "all" & rank_partido_paso == 2 ~ partido_2_pct_paso - partido_1_pct_paso
    , comparison == "2 vs 3" & parties_denom == "all" & rank_partido_paso == 2 ~ partido_2_pct_paso - partido_3_pct_paso
    , comparison == "2 vs 3" & parties_denom == "all" & rank_partido_paso == 3 ~ partido_3_pct_paso - partido_2_pct_paso)
    , muni_year = str_c (municipio, year, sep="_") %>% factor ()
    , lag = NULL) %>% 
  filter (
    !(grepl ("_lag", outcome_name) & ref_rd_party=="all parties") ## lagging for the first or second in the PASO automatically lags for the first or second in the previous election, and thus the sample ends up unbalanced by construction. In any case, note that when using "all parties", only candidate- or party-specific characteristis may end up unbalanced
    & year >= 2011 & !is.na (running) #& toptwo == 1
    & ((comparison == "1 vs 2" & n_parties_paso >= 2)
       | (comparison == "2 vs 3" & n_parties_paso >= 3))
    & !is.na (outcome_value) ## mostly delta lags in 2011 -> there were no primaries in 2009
    ) %>% 
  group_by (municipio, year, comparison, parties_denom, ref_rd_party, sample_group, sample_ref, outcome_name) %>% 
  mutate (n_obs_muni_year = n()) %>% 
  ungroup ()
summary (byparty_rd)

with (byparty_rd, table (ref_rd_party, n_obs_muni_year, useNA="ifany")) ## incumbent, PJ or UCR -> always 1. "all parties" -> almost always 2 obs per muni-year. Only exception is for 104 cases of votos_partido_log_delta, for which we have an NA if that party did not participate in the general election. We don't care much about these cases:
byparty_rd %>% filter (ref_rd_party=="all parties" & n_obs_muni_year == 1) %>% summary ()



### (4.2.2) Running the loop to get the underlying data for all the plots ----
plot_results <- NULL ## we need this to store the results

for (s in 1:n_distinct (byparty_rd$sample_ref)){ ## (sub)sample
  
  for (c in 1:n_distinct (byparty_rd$comparison)){ ## comparison group: 1v2 or 2v3
    
    for (p in 1:n_distinct (byparty_rd$ref_rd_party)){ ## reference party
      
      for (o in 1:n_distinct (byparty_rd$outcome_name)){ ## outcome
        
        ## drawing the plots
        
        # QSMV: quantile-spaced, mimicking variability
        # => better reflects the actual distribution of the data; MV recommended by Korting et alii (2023)
        if (
          byparty_rd %>% filter (
            parties_denom == "classif" ## only report these for the plots
            & sample_ref == unique (byparty_rd$sample_ref)[s]
            & comparison == unique (byparty_rd$comparison)[c]
            & ref_rd_party == unique (byparty_rd$ref_rd_party)[p]
            & outcome_name == unique (byparty_rd$outcome_name)[o]) %>% nrow () < 2) {
          ## do nothing
        
          } else { ## keep running the loop
            
            success <- TRUE  ## flag to track success
            
            tryCatch ({
              rdplot_qsmv <- with(
                byparty_rd %>% filter(
                  parties_denom == "classif"
                  & sample_ref == unique(byparty_rd$sample_ref)[s]
                  & comparison == unique(byparty_rd$comparison)[c]
                  & ref_rd_party == unique(byparty_rd$ref_rd_party)[p]
                  & outcome_name == unique(byparty_rd$outcome_name)[o]),
                rdplot(
                  y=outcome_value, x=running, p=3, binselect="qsmv", kernel="uniform",
                  scale=1, x.lim=c (-50, 50), hide=TRUE))
              }, error = function(e) {
              cat ("Error occurred for iteration", s, c, p, o, ": ", conditionMessage(e), "\n")
              
                ## Handle the error or take any necessary action
              success <- FALSE  # Set the flag to indicate failure
              })
            
            if (success) {
            rdplot_qsmv <- with (
              byparty_rd %>% filter (
                parties_denom == "classif"
                & sample_ref == unique (byparty_rd$sample_ref)[s]
                & comparison == unique (byparty_rd$comparison)[c]
                & ref_rd_party == unique (byparty_rd$ref_rd_party)[p]
                & outcome_name == unique (byparty_rd$outcome_name)[o]), rdplot (
                  y=outcome_value, x=running, p=3, binselect="qsmv", kernel="uniform"
                  , scale=1, x.lim=c (-50, 50), hide=TRUE))
            
            ## exporting the data
            tmp_values <- bind_rows (
              ggplot_build (rdplot_qsmv$rdplot)$data[[1]] %>% select (x, y) %>% mutate (type="bin")
              , ggplot_build (rdplot_qsmv$rdplot)$data[[1]] %>% select (x, y) %>% mutate (type="ll")
              , ggplot_build (rdplot_qsmv$rdplot)$data[[1]] %>% select (x, y) %>% mutate (type="rl")) %>%
              mutate (
                sample_ref = unique (byparty_rd$sample_ref)[s] %>% as.character ()
                , comparison = unique (byparty_rd$comparison)[c] %>% as.character ()
                , ref_rd_party = unique (byparty_rd$ref_rd_party)[p] %>% as.character ()
                , outcome_name = unique (byparty_rd$outcome_name)[o] %>% as.character ()
                , bin_select="qsmv", poly=3)
            
            plot_results <- bind_rows (plot_results, tmp_values) 
            }}}}}}

## building the definitive dataset
plot_results2 <- plot_results %>% 
  mutate (
    type = factor (type, levels=c ("bin", "ll", "rl"))
    , bin_select = factor (bin_select)
    , sample_ref = factor (sample_ref, levels=c (
      "all observations", "concurrent elections", "midterm elections", "Conurbano", "Interior", "small municipalities", "large municipalities"))
    , ref_rd_party = factor (ref_rd_party, levels=c (
      "all parties", "incumbent party", "PJ", "UCR"))
    , outcome_name = factor (outcome_name, levels=outcomes)
    , out_timing = ifelse (
      grepl ("_lag", outcome_name), "lag", "current") %>% factor ()
    , out_var = dplyr::recode (
      as.character (outcome_name)
      , winner_general = "victory\n(0/100)"
      , winner_general_lag = "victory\n(0/100)"
      , votos_partido_pct_general = "vote percentage\n(0:100)"
      , votos_partido_pct_general_lag = "vote percentage\n(0:100)"
      , votos_partido_pct_delta = "\u0394 vote percentage\n(-100:100)"
      , votos_partido_pct_delta_lag = "\u0394 vote percentage\n(-100:100)"
      , votos_partido_log_delta = "log ([votes general] / [votes primary])"
      , votos_partido_log_delta_lag = "log ([votes general] / [votes primary])")
    , out_var = factor (out_var, levels = c (
      "victory\n(0/100)", "vote percentage\n(0:100)", "\u0394 vote percentage\n(-100:100)", "log ([votes general] / [votes primary])"))
    , comparison_full = case_when (
      sample_ref == "all observations" ~ comparison
      , sample_ref != "all observations" ~ str_c (comparison, sample_ref, sep=": "))
    , comparison_full = factor (comparison_full)
    , comparison = factor (comparison)) %>% 
  group_by (sample_ref, comparison, ref_rd_party, outcome_name, bin_select, poly) %>% ## to identify each plot uniquely
  mutate (
    min_x_left = max (x[x<0])
    , y_below = ifelse (type=="ll" & x==min_x_left, y, NA)
    , y_below = max (y_below, na.rm=TRUE)
    , min_x_right = min (x[x>0])
    , y_above = ifelse (type=="rl" & x==min_x_right, y, NA)
    , y_above = max (y_above, na.rm=TRUE)
    , rd_estim = y_above - y_below
    , rd_estim_text = str_c ("\u03b2 = ", sprintf ("%.1f", round (rd_estim, 1)), "pp.", sep=""))
summary (plot_results2)



### (4.2.3) Drawing the plots ----

## (4.2.3.1) Current effects: primary -> general ----

## full sample
(rd_all_full <- ggplot (
  plot_results2 %>% filter (
    sample_ref == "all observations" 
    & ref_rd_party == "all parties" & out_timing == "current" 
    & out_var %in% c ("victory\n(0/100)", "vote percentage\n(0:100)")
    & type == "bin" & bin_select == "qsmv" & poly == 3)
  , aes (x=x, y=y))
 + geom_vline (xintercept=0, col=col_concurrent)
 + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
 + facet_grid (out_var ~ comparison_full)
 + xlim (-50, 50) + ylim (0, 100)
 + xlab (expression (margin~of~victory[primary]))
 + ylab (expression ("%"[general]))
 + theme (axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )


## split sample (I): midterm vs concurrent
(rd_all_midt <- ggplot (
  plot_results2 %>% filter (
    sample_ref %in% c ("concurrent elections", "midterm elections")
    & ref_rd_party == "all parties" & out_timing == "current" 
    & out_var %in% c ("victory\n(0/100)", "vote percentage\n(0:100)")
    & type == "bin" & bin_select == "qsmv" & poly == 3)
  , aes (x=x, y=y))
  + geom_vline (xintercept=0, col=col_concurrent)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
  + facet_grid (out_var ~ comparison_full)
  + xlim (-50, 50) + ylim (0, 100)
  + xlab (expression (margin~of~victory[primary]))
  + ylab (expression ("%"[general]))
  + theme (axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )


## split sample (II): large vs. small municipalities
(rd_all_size <- ggplot (
  plot_results2 %>% filter (
    sample_ref %in% c ("large municipalities", "small municipalities")
    & ref_rd_party == "all parties" & out_timing == "current" 
    & out_var %in% c ("victory\n(0/100)", "vote percentage\n(0:100)")
    & type == "bin" & bin_select == "qsmv" & poly == 3)
  , aes (x=x, y=y))
  + geom_vline (xintercept=0, col=col_concurrent)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_bin)
  + facet_grid (out_var ~ comparison_full)
  + xlim (-50, 50) + ylim (0, 100)
  + xlab (expression (margin~of~victory[primary]))
  + ylab (expression ("%"[general]))
  + theme (axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )





############################
##### (4.3) RD analyses ----
############################

### (4.3.1) loop to get the main estimates

# objects to store the values of interest
rd_results <- NULL ## we need this to "store" the results

# loop to get all the results
for (s in 1:n_distinct (byparty_rd$sample_ref)){ ## (sub)sample
  
  for (c in 1:n_distinct (byparty_rd$comparison)){ ## comparison group: 1v2 or 2v3
    
    for (d in 1:n_distinct (byparty_rd$parties_denom)){ ## votes in the denominator when calculating values in the primary: all parties that participated vs. those that qualified for the general election
      
      for (p in 1:n_distinct (byparty_rd$ref_rd_party)){ ## reference party
        
        for (o in 1:n_distinct (byparty_rd$outcome_name)){ ## outcome
          
          for (b in c ("mserd", "cerrd")) { ## selecting the bandwidth selection procedure
            
            for (l in 1:2) {  ## selecting the polynomial degree
              
              data_tmp <- byparty_rd %>% 
                filter (
                  sample_ref == unique (byparty_rd$sample_ref)[s]
                  & comparison == unique (byparty_rd$comparison)[c]
                  & parties_denom == unique (byparty_rd$parties_denom)[d]
                  & ref_rd_party == unique (byparty_rd$ref_rd_party)[p]
                  & outcome_name == unique (byparty_rd$outcome_name)[o])
              
              
              ## RD analyses
              if (
                (data_tmp %>% filter (running < 0) %>% nrow () < 12) ## fewer than 12 observations on either side of the cutoff
                | (data_tmp %>% filter (running > 0) %>% nrow () < 12)
            ) {
                ## do nothing ##
                
              } else { ## keep running the loop
                
                # no controls
                rd_main <- with (data_tmp, rdrobust (
                  y=outcome_value, x=running, bwselect=b, covs=NULL, cluster=year, p=l, q=l+1))
                
                # w/controls
                rd_cont <- with (data_tmp, rdrobust (
                  y=outcome_value, x=running, bwselect=b
                  , covs=concejoSize + turnout_pct_paso + midterm ## election-level
                  + primary_comp + votos_faccion_pct_party + primary_golosov + n_faccions
                  + pjoficial + ucr + inc_dummy + presi_gob_party
                  , cluster=year, p=l, q=l+1))
                
                # SD of outcome in the control group within (conventional) RD bandwidth
                sd_main_c <- sd (filter (data_tmp, running<0 & abs(running)<=rd_main$bws[1,1])$outcome_value, na.rm=TRUE)
                sd_cont_c <- sd (filter (data_tmp, running<0 & abs(running)<=rd_cont$bws[1,1])$outcome_value, na.rm=TRUE)
                
                # power
                pow_main01 <- rdpower (
                  data=as.data.frame (data_tmp) %>% select (outcome_value, running)
                  , cutoff=0, alpha=0.05, level=95, p=l, q=l+1, covs=NULL, plot=FALSE
                  , tau=sd_main_c, cluster=data_tmp$year)
                pow_main02 <- rdpower (
                  data=as.data.frame (data_tmp) %>% select (outcome_value, running)
                  , cutoff=0, alpha=0.05, level=95, p=l, q=l+1, covs=NULL, plot=FALSE
                  , tau=sd_main_c/2, cluster=data_tmp$year)
                pow_main03 <- rdpower (
                  data=as.data.frame (data_tmp) %>% select (outcome_value, running)
                  , cutoff=0, alpha=0.05, level=95, p=l, q=l+1, covs=NULL, plot=FALSE
                  , tau=rd_main$Estimate[1], cluster=data_tmp$year)
                
                pow_cont01 <- rdpower (
                  data=as.data.frame (data_tmp) %>% select (outcome_value, running)
                  , cutoff=0, alpha=0.05, level=95, p=l, q=l+1, covs=NULL, plot=FALSE
                  , tau=sd_cont_c, cluster=data_tmp$year)
                pow_cont02 <- rdpower (
                  data=as.data.frame (data_tmp) %>% select (outcome_value, running)
                  , cutoff=0, alpha=0.05, level=95, p=l, q=l+1, covs=NULL, plot=FALSE
                  , tau=sd_cont_c/2, cluster=data_tmp$year)
                pow_cont03 <- rdpower (
                  data=as.data.frame (data_tmp) %>% select (outcome_value, running)
                  , cutoff=0, alpha=0.05, level=95, p=l, q=l+1, covs=NULL, plot=FALSE
                  , tau=rd_cont$Estimate[1], cluster=data_tmp$year)}
              
              
              ## storing everything
              rd_results <- rbind (
                rd_results
                
                , c ( ## main results
                  unique (byparty_rd$sample_ref)[s] %>% as.character ()
                  , unique (byparty_rd$comparison)[c] %>% as.character ()
                  , unique (byparty_rd$parties_denom)[d] %>% as.character ()
                  , unique (byparty_rd$ref_rd_party)[p] %>% as.character (), "none"
                  , unique (byparty_rd$outcome_name)[o] %>% as.character ()
                  , b, l, extract_rd (rd_main), sd_main_c
                  , pow_main01$power.rbc, pow_main02$power.rbc, pow_main03$power.rbc)
                
                , c ( ## w/controls
                  unique (byparty_rd$sample_ref)[s] %>% as.character ()
                  , unique (byparty_rd$comparison)[c] %>% as.character ()
                  , unique (byparty_rd$parties_denom)[d] %>% as.character ()
                  , unique (byparty_rd$ref_rd_party)[p] %>% as.character (), "all controls"
                  , unique (byparty_rd$outcome_name)[o] %>% as.character ()
                  , b, l, extract_rd (rd_cont), sd_main_c
                  , pow_cont01$power.rbc, pow_cont02$power.rbc, pow_cont03$power.rbc))
              }}}}}}}
colnames (rd_results) <- c ("sample", "comparison", "parties_denom", "ref_party", "covs", "outcome", "bwdselect", "poly", "est", "ci", "n", "bwd", "pval", "sd_c", "pow_sd", "pow_sd2", "pow_beta")
rd_results2 <- rd_results %>% 
  as.data.frame () %>% 
  mutate (
    sample = factor (sample, levels=c (
      "all observations", "concurrent elections", "midterm elections", "Conurbano", "Interior", "small municipalities", "large municipalities"))
    , comparison = factor (comparison)
    , parties_denom = factor (parties_denom)
    , ref_party = factor (ref_party, levels=c (
      "all parties", "incumbent party", "PJ", "UCR"))
    , ref_party2 = case_when (
      comparison == "1 vs 2" & ref_party == "all parties" ~ "first/second"
      , comparison == "2 vs 3" & ref_party == "all parties" ~ "second/third"
      , ref_party == "incumbent party" ~ "incumbent"
      , ref_party == "PJ" ~ "\\textsc{pj}"
      , ref_party == "UCR" ~ "\\textsc{ucr}")
    , outcome_full = case_when (
      outcome == "winner_general" ~ "{\\it winner}$^{\\text{G}}$ (0/100)"
      , outcome == "votos_partido_pct_general" ~ "\\% {\\it vote}$^{\\text{G}}$ (0:100)"
      , TRUE ~ as.character (outcome))
    , outcome = factor (outcome, levels=outcomes)
    , n = str_replace_all (n, "\\|", "$|$")
    , covs = factor (covs, levels=c ("none", "all controls"))
    , bwd = sprintf ("%.1f", as.numeric (bwd)) %>% as.character ()
    , pval = sprintf ("%.2f", as.numeric (pval)) %>% as.character ()
    , bwdselect = factor (bwdselect, levels=c ("mserd", "cerrd"))
    , poly = as.numeric (poly) 
    , est = sprintf ("%.2f", round (as.numeric (est), 2))
    , sd_c = sprintf ("%.2f", round (as.numeric (sd_c), 2))
    , pow_sd = sprintf ("%.2f", round (as.numeric (pow_sd), 2))
    , pow_sd2 = sprintf ("%.2f", round (as.numeric (pow_sd2), 2))
    , pow_beta = sprintf ("%.2f", round (as.numeric (pow_beta), 2))) %>% 
  separate_wider_delim ( ## two decimals for all ci's
    cols=ci, delim=":", names=c ("ci_left", "ci_right")) %>% 
  mutate (
    ci_left = str_replace_all (ci_left, "\\[", "")
    , ci_right = str_replace_all (ci_right, "\\]", "")
    , ci = str_c ("[", sprintf ("%.2f", as.numeric (ci_left)), " : ", sprintf ("%.2f", as.numeric (ci_right)), "]")) %>% 
  arrange (bwdselect, poly, comparison, covs, outcome, ref_party)
summary (rd_results2)



### (4.3.2) Heterogeneous effects: 2 vs 3 conditional on performance of first-placed party ----

# if we start with a margin of victory of 1 percentage point, we have at least 10 observations on each side of the threshold:
byparty_rd %>% 
  filter (
    comparison == "2 vs 3" & ref_rd_party == "all parties" & parties_denom == "classif"
    & outcome_name == "winner_general") %>% 
  group_by (sample_ref) %>% 
  summarise (
    n = sum ((partido_1_pct_classif_paso - partido_2_pct_classif_paso) < 2)/2
    , min_12 = min (partido_1_pct_classif_paso - partido_2_pct_classif_paso)
    , max_12 = max (partido_1_pct_classif_paso - partido_2_pct_classif_paso))
## (maximum value will be 72)

# what about vote share of the largest party?
byparty_rd %>% 
  filter (
    comparison == "2 vs 3" & ref_rd_party == "all parties" & parties_denom == "classif"
    & outcome_name == "winner_general") %>% 
  group_by (sample_ref) %>% 
  summarise (
    n35 = sum ((partido_1_pct_classif_paso) < 35)/2
    , min_12 = min (partido_1_pct_classif_paso)
    , max_12 = max (partido_1_pct_classif_paso))
## we'll go from 34 to 85

# alternative bandwidth values + objects for storage
(bwds_margin12 <- seq (2, 72, by=1))
(bwds_first <- seq (35, 85, by=1))
margs_bwds <- vshare1_bwds <- NULL


## (4.3.2.1) loop: margin of first-placed party
for (m in 1:n_distinct (bwds_margin12)){ ## margin value
  
  for (s in c ("all observations", "concurrent elections", "midterm elections")){ ## reference sample
    
    for (o in c ("winner_general", "votos_partido_pct_general")){ ## outcome
      
      ## selecting the subsample of interest
      data_tmp <- byparty_rd %>% 
        filter (
          ref_rd_party == "all parties"
          & parties_denom == "classif"
          & comparison == "2 vs 3"
          & (partido_1_pct_classif_paso - partido_2_pct_classif_paso) <= bwds_margin12[m]
          & sample_ref == s
          & outcome_name == o)
    
    ## getting the RD estimates
    rd_tmp <- with (data_tmp, rdrobust (
      y=outcome_value, x=running, bwselect="mserd", covs=NULL, cluster=year, p=1, q=2))
    
    ## storing everything
    margs_bwds <- rbind (
      margs_bwds, c (
        unique (data_tmp$comparison) %>% as.character ()
        , s, o, bwds_margin12[m], rd_tmp$Estimate[1], rd_tmp$ci[3,], rd_tmp$N_h))
    }}}
colnames (margs_bwds) <- c ("comparison", "sample_ref", "outcome", "margin12", "est", "ci_low", "ci_high", "n_left", "n_right")
margs_bwds2 <- margs_bwds %>% 
  as.data.frame () %>% 
  mutate (
    comparison = factor (comparison)
    , sample_ref = factor (sample_ref)
    , ref_party = factor ("all parties")
    , outcome = factor (outcome, levels=c (
      "winner_general", "votos_partido_pct_general"))
    , out_var = dplyr::recode (
      as.character (outcome)
      , winner_general = "outcome:\nvictory (0/100)"
      , votos_partido_pct_general = "outcome:\nvote % (0:100)")
    , out_var = factor (out_var, levels = c (
      "outcome:\nvictory (0/100)", "outcome:\nvote % (0:100)"))
    , margin12 = as.numeric (margin12)
    , est = as.numeric (est)
    , ci_low = as.numeric (ci_low)
    , ci_high = as.numeric (ci_high)
    , n_left = as.numeric (n_left)
    , n_right = as.numeric (n_right)) %>% ## adding values from main results
  left_join (
    rd_results2 %>% 
      filter (parties_denom=="classif" & comparison=="2 vs 3" & ref_party=="all parties" & covs=="none" & bwdselect=="mserd" & poly==1) %>% 
      mutate (tau_rd = est) %>% 
      select (comparison, sample, outcome, tau_rd, ci_left, ci_right)
    , by = c ("comparison"="comparison", "sample_ref"="sample", "outcome"="outcome")) %>% 
  mutate (
    tau_rd = as.numeric (tau_rd)
    , ci_left = as.numeric (ci_left)
    , ci_right = as.numeric (ci_right))
summary (margs_bwds2)


## (4.3.2.2) Drawing the plots

# conditioning on margin 1 vs 2
(cond_margin12_win <- ggplot (
  margs_bwds2 %>% filter (outcome=="winner_general" & sample_ref=="all observations")
  , aes (x=margin12, y=est))
  + geom_hline (yintercept=0, col=col_cutoff)
  + geom_hline (yintercept=filter (margs_bwds2, outcome == "winner_general" & sample_ref=="all observations")$tau_rd, col=col_gen)
  + geom_hline (yintercept=filter (margs_bwds2, outcome == "winner_general" & sample_ref=="all observations")$ci_left, col=col_gen, linetype=2)
  + geom_hline (yintercept=filter (margs_bwds2, outcome == "winner_general" & sample_ref=="all observations")$ci_right, col=col_gen, linetype=2)
  + geom_linerange (aes (ymin=ci_low, ymax=ci_high), size=size_bin/2, col=col_paso)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_paso)
  + scale_x_continuous ("margin of first-placed party in primary: at least (%)", limits=c (0, max(bwds_margin12)), breaks=seq (0, 70, by=10))
  + scale_y_continuous ("RD point estimate and 95% CI", limits=c (-5, 120), breaks=seq (-20, 120, by=20))
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9))
  + theme (axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )

(cond_margin12_sh <- ggplot (
  margs_bwds2 %>% filter (outcome=="votos_partido_pct_general" & sample_ref=="all observations")
  , aes (x=margin12, y=est))
  + geom_hline (yintercept=0, col=col_cutoff)
  + geom_hline (yintercept=filter (margs_bwds2, outcome == "votos_partido_pct_general" & sample_ref=="all observations")$tau_rd, col=col_gen)
  + geom_hline (yintercept=filter (margs_bwds2, outcome == "votos_partido_pct_general" & sample_ref=="all observations")$ci_left, col=col_gen, linetype=2)
  + geom_hline (yintercept=filter (margs_bwds2, outcome == "votos_partido_pct_general" & sample_ref=="all observations")$ci_right, col=col_gen, linetype=2)
  + geom_linerange (aes (ymin=ci_low, ymax=ci_high), size=size_bin/2, col=col_paso)
  + geom_point (size=size_bin, alpha=alpha_bin, col=col_paso)
  + scale_x_continuous ("margin of first-placed party in primary: at least (%)", limits=c (0, max(bwds_margin12)), breaks=seq (0, 70, by=10))
  + scale_y_continuous ("RD point estimate and 95% CI", limits=c (-8, 23), breaks=seq (-5, 25, by=5))
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9))
  + theme (axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )



### (4.3.3) Robustness: Alternative bandwidths ----

## (4.3.3.1) Getting the estimates

# alternative bandwidth values + objects for storage
(bwds <- seq (5, 35, by=1))
alt_bwds <- NULL

# doing the loop
for (c in 1:n_distinct (byparty_rd$comparison)){ ## comparison group: 1v2 or 2v3
  
  for (p in 1:n_distinct (byparty_rd$ref_rd_party)){ ## reference party
    
    for (o in 1:n_distinct (unique (byparty_rd$outcome_name)[!grepl ("_lag", unique (byparty_rd$outcome_name))])){ ## outcome, excluding lags
      
      ## selecting the subsample of interest
      data_tmp <- byparty_rd %>% 
        filter (
          sample_ref == "all observations"
          & parties_denom == "classif"
          & comparison == unique (byparty_rd$comparison)[c]
          & ref_rd_party == unique (byparty_rd$ref_rd_party)[p]
          & outcome_name == unique (byparty_rd$outcome_name)[!grepl ("_lag", unique (byparty_rd$outcome_name))][o])
      
      ## for every c-p-o combination, we need the optimal bandwidth, as well as half and double the optimal one, and the correlation between h and b:
      rd_opt_main <- with (data_tmp, rdrobust (
        y=outcome_value, x=running, bwselect="mserd", covs=NULL, cluster=year, p=1, q=2))
      rd_opt_bwds <- c (bwds, rd_opt_main$bws[1,1], rd_opt_main$bws[1,1]*1/2, rd_opt_main$bws[1,1]*2)
      rho <- rd_opt_main$bws[1,1] / rd_opt_main$bws[2,1]
      
      ## looping over all bandwidth choices
      for (b in 1:(n_distinct (bwds)+3)){
        rd_tmp <- with (data_tmp, rdrobust (
          y=outcome_value, x=running, bwselect="mserd", covs=NULL, cluster=year, p=1, q=2
          , h=rd_opt_bwds[b], b=rd_opt_bwds[b]/rho)) ## for the bias-corrected estimates
        
        ## storing everything
        alt_bwds <- rbind (
          alt_bwds, c (
            unique (byparty_rd$comparison)[c] %>% as.character ()
            , unique (byparty_rd$ref_rd_party)[p] %>% as.character ()
            , unique (byparty_rd$outcome_name)[o] %>% as.character ()
            , b, rd_opt_bwds[b], rd_tmp$Estimate[1], rd_tmp$ci[3,], rd_tmp$N_h))
        }}}}
colnames (alt_bwds) <- c ("comparison", "ref_party", "outcome", "b", "bwd", "est", "ci_low", "ci_high", "n_left", "n_right")
alt_bwds2 <- alt_bwds %>% 
  as.data.frame () %>% 
  mutate (
    sample = factor ("all observations")
    , comparison = factor (comparison)
    , ref_party = factor (ref_party, levels=c (
      "all parties", "incumbent party", "PJ", "UCR"))
    , ref_party2 = case_when (
      comparison == "1 vs 2" & ref_party == "all parties" ~ "first vs second"
      , comparison == "2 vs 3" & ref_party == "all parties" ~ "second vs third"
      , ref_party == "incumbent party" ~ "reference party:\nincumbent"
      , ref_party == "PJ" ~ "reference party:\nPJ"
      , ref_party == "UCR" ~ "reference party:\nUCR")
    , ref_party2 = factor (ref_party2, levels=c (
      "first vs second", "second vs third", "reference party:\nincumbent", "reference party:\nPJ", "reference party:\nUCR"))
    , outcome = factor (outcome, levels=outcomes[!grepl("_lag", outcomes)])
    , out_var = dplyr::recode (
      as.character (outcome)
      , winner_general = "outcome:\nvictory (0/100)"
      , votos_partido_pct_general = "outcome:\nvote % (0:100)"
      , votos_partido_pct_delta = "outcome:\n\u0394 vote % (-100:100)"
      , votos_partido_log_delta = "outcome:\nlog (votes general / votes primary)")
    , out_var = factor (out_var, levels = c (
      "outcome:\nvictory (0/100)", "outcome:\nvote % (0:100)", "outcome:\n\u0394 vote % (-100:100)", "outcome:\nlog (votes general / votes primary)"))
    , b = as.numeric (b)
    , bwd = as.numeric (bwd)
    , bwd_type = ifelse (bwd %in% bwds, "manual: 5 to 35", "1/2x; 1x; or 2x CCT-optimal bandwidth") %>% factor ()
    , est = as.numeric (est)
    , ci_low = as.numeric (ci_low)
    , ci_high = as.numeric (ci_high)
    , n_left = as.numeric (n_left)
    , n_right = as.numeric (n_right)) 
summary (alt_bwds2)


## (4.3.3.2) Drawing the plots

# all observations
(bwd_current_all <- ggplot (
  alt_bwds2 %>% filter (
    outcome %in% c ("winner_general", "votos_partido_pct_general")
    & ref_party == "all parties")
  , aes (x=bwd, y=est, color=bwd_type))
  + geom_hline (yintercept=0, col=col_cutoff)
  + geom_linerange (aes (ymin=ci_low, ymax=ci_high), size=size_bin/2)
  + geom_point (size=size_bin, alpha=alpha_bin)
  + facet_grid (out_var ~ comparison, scales="free_y")
  + scale_x_continuous ("bandwidth", limits=c (4.5, 35.5), breaks=seq (5, 35, by=5))
  + scale_y_continuous ("RD point estimate and 95% CI")
  + scale_color_manual (values=c (col_gen, col_paso))
  + theme (legend.position="bottom", legend.title=element_blank (), legend.box.margin=margin (-15,-9,-9,-9))
  + theme (axis.title.x = element_text (margin=margin (t=10, r=0, b=0, l=0))) )



### (4.3.4) Robustness: At which LEVEL are votes counted? Municipal vs Provincial vs National ----

## (4.3.4.1) Preparing the data
byparty_mesa_rd <- byparty_mesa %>% 
  pivot_wider (
    id_cols = c (level:elYear, muni_id:agrupacion_nombre)
    , names_from = type
    , values_from = c (turnout_pct:rank_3)) %>% 
  filter (
    !(elYear==2011 & level=="local") & ## no data for this
      (rank_1_paso == 1 | rank_2_paso == 1 | (!is.na (rank_3_paso) & rank_3_paso== 1)) &
      !is.na (votes_1_pct_general)) %>% 
  mutate (
    running12 = case_when (
      rank_1_paso == 1 ~ votes_1_pct_paso - votes_2_pct_paso
      , rank_2_paso == 1 ~ votes_2_pct_paso - votes_1_pct_paso)
    , running23 = case_when (
      rank_2_paso == 1 ~ votes_2_pct_paso - votes_3_pct_paso
      , rank_3_paso == 1 ~ votes_3_pct_paso - votes_2_pct_paso)
    , rank_1_general = rank_1_general * 100) %>% 
  rename (
    "year" = "elYear"
    , "winner_general" = "rank_1_general"
    , "votos_partido_pct_general" = "votes_pct_general") %>% 
  pivot_longer (
    cols = c (winner_general, votos_partido_pct_general)
    , names_to = "outcome_name"
    , values_to = "outcome_value") %>% 
  pivot_longer (
    cols = c (running12, running23)
    , names_to = "comparison"
    , values_to = "running") %>% 
  mutate (
    outcome_name = outcome_name %>% factor (levels=c ("winner_general", "votos_partido_pct_general"))
    , comparison = case_when (
      comparison == "running12" ~ "1 vs 2"
      , comparison == "running23" ~ "2 vs 3") %>% factor ())
summary (byparty_mesa_rd)


## (4.3.4.2) Estimating the models

# objects to store the values of interest
rd_results_level <- NULL ## we need this to "store" the results

# loop to get all the results
for (s in 1:n_distinct (byparty_mesa_rd$level)){ ## (sub)sample: comparison group
  
  for (c in 1:n_distinct (byparty_mesa_rd$comparison)){ ## comparison group: 1v2 or 2v3
    
    for (o in 1:n_distinct (byparty_mesa_rd$outcome_name)){ ## outcome
      
      for (b in c ("mserd", "cerrd")) { ## selecting the bandwidth selection procedure
        
        for (l in 1:2) {  ## selecting the polynomial degree
          
          data_tmp <- byparty_mesa_rd %>% 
            filter (
              level == unique (byparty_mesa_rd$level)[s]
              & comparison == unique (byparty_mesa_rd$comparison)[c]
              & outcome_name == unique (byparty_mesa_rd$outcome_name)[o])
          
          
          ## RD analyses
          if (
            (data_tmp %>% filter (running < 0) %>% nrow () < 12) ## fewer than 12 observations on either side of the cutoff
            | (data_tmp %>% filter (running > 0) %>% nrow () < 12)
            
          ) {
            ## do nothing ##
            
          } else { ## keep running the loop
            
            # no controls
            rd_main <- with (data_tmp, rdrobust (
              y=outcome_value, x=running, bwselect=b, covs=NULL, cluster=year, p=l, q=l+1))
            
            # SD of outcome in the control group within (conventional) RD bandwidth
            sd_main_c <- sd (filter (data_tmp, running<0 & abs(running)<=rd_main$bws[1,1])$outcome_value, na.rm=TRUE)
            
            # power
            pow_main01 <- rdpower (
              data=as.data.frame (data_tmp) %>% select (outcome_value, running)
              , cutoff=0, alpha=0.05, level=95, p=l, q=l+1, covs=NULL, plot=FALSE
              , tau=sd_main_c, cluster=data_tmp$year)
            pow_main02 <- rdpower (
              data=as.data.frame (data_tmp) %>% select (outcome_value, running)
              , cutoff=0, alpha=0.05, level=95, p=l, q=l+1, covs=NULL, plot=FALSE
              , tau=sd_main_c/2, cluster=data_tmp$year)
            pow_main03 <- rdpower (
              data=as.data.frame (data_tmp) %>% select (outcome_value, running)
              , cutoff=0, alpha=0.05, level=95, p=l, q=l+1, covs=NULL, plot=FALSE
              , tau=rd_main$Estimate[1], cluster=data_tmp$year)
            
            
          ## storing everything
            rd_results_level <- rbind (
              rd_results_level
            
            , c ( ## main results
              unique (byparty_mesa_rd$level)[s] %>% as.character ()
              , unique (byparty_mesa_rd$comparison)[c] %>% as.character (), "none"
              , unique (byparty_mesa_rd$outcome_name)[o] %>% as.character ()
              , b, l, extract_rd (rd_main), sd_main_c
              , pow_main01$power.rbc, pow_main02$power.rbc, pow_main03$power.rbc))
            }}}}}}
colnames (rd_results_level) <- c ("level", "comparison", "covs", "outcome", "bwdselect", "poly", "est", "ci", "n", "bwd", "pval", "sd_c", "pow_sd", "pow_sd2", "pow_beta")
rd_results_level2 <- rd_results_level %>% 
  as.data.frame () %>% 
  mutate (
    level = case_when (level == "local" ~ "municipal", TRUE ~ level) %>% 
      factor (levels=c ("municipal", "provincial", "national"))
    , comparison = factor (comparison)
    , outcome_full = case_when (
      outcome == "winner_general" ~ "{\\it winner}$^{\\text{G}}$ (0/100)"
      , outcome == "votos_partido_pct_general" ~ "\\% {\\it vote}$^{\\text{G}}$ (0:100)"
      , TRUE ~ as.character (outcome))
    , outcome = factor (outcome, levels=outcomes)
    , n = str_replace_all (n, "\\|", "$|$")
    , covs = factor (covs, levels=c ("none", "all controls"))
    , bwd = sprintf ("%.1f", as.numeric (bwd)) %>% as.character ()
    , pval = sprintf ("%.2f", as.numeric (pval)) %>% as.character ()
    , bwdselect = factor (bwdselect, levels=c ("mserd", "cerrd"))
    , poly = as.numeric (poly)
    , est = sprintf ("%.2f", round (as.numeric (est), 2))
    , sd_c = sprintf ("%.2f", round (as.numeric (sd_c), 2))
    , pow_sd = sprintf ("%.2f", round (as.numeric (pow_sd), 2))
    , pow_sd2 = sprintf ("%.2f", round (as.numeric (pow_sd2), 2))
    , pow_beta = sprintf ("%.2f", round (as.numeric (pow_beta), 2))) %>% 
  separate_wider_delim ( ## two decimals for all ci's
    cols=ci, delim=":", names=c ("ci_left", "ci_right")) %>% 
  mutate (
    ci_left = str_replace_all (ci_left, "\\[", "")
    , ci_right = str_replace_all (ci_right, "\\]", "")
    , ci = str_c ("[", sprintf ("%.2f", as.numeric (ci_left)), " : ", sprintf ("%.2f", as.numeric (ci_right)), "]")) %>% 
  arrange (bwdselect, poly, level, outcome, comparison)
summary (rd_results_level2)


## (4.3.4.3) Using only 2011-2023 data, the municipal-level results are very similar. The difference with the main results is a sampling issue:
summary (rd_local_win12 <- with (
  byparty_rd %>% 
    filter (
      year %in% 2013:2023
      , comparison == "1 vs 2"
      , parties_denom == "classif"
      , ref_rd_party == "all parties"
      , sample_ref=="all observations"
      , outcome_name == "winner_general")
  , rdrobust (
    y=outcome_value, x=running, bwselect="mserd", covs=NULL, cluster=year, p=1, q=1+1)))
summary (rd_local_win23 <- with (
  byparty_rd %>% 
    filter (
      year %in% 2013:2023
      , comparison == "2 vs 3"
      , parties_denom == "classif"
      , ref_rd_party == "all parties"
      , sample_ref=="all observations"
      , outcome_name == "winner_general")
  , rdrobust (
    y=outcome_value, x=running, bwselect="mserd", covs=NULL, cluster=year, p=1, q=1+1)))
summary (rd_local_pct12 <- with (
  byparty_rd %>% 
    filter (
      year %in% 2013:2023
      , comparison == "1 vs 2"
      , parties_denom == "classif"
      , ref_rd_party == "all parties"
      , sample_ref=="all observations"
      , outcome_name == "votos_partido_pct_general")
  , rdrobust (
    y=outcome_value, x=running, bwselect="mserd", covs=NULL, cluster=year, p=1, q=1+1)))
summary (rd_local_pct23 <- with (
  byparty_rd %>% 
    filter (
      year %in% 2013:2023
      , comparison == "2 vs 3"
      , parties_denom == "classif"
      , ref_rd_party == "all parties"
      , sample_ref=="all observations"
      , outcome_name == "votos_partido_pct_general")
  , rdrobust (
    y=outcome_value, x=running, bwselect="mserd", covs=NULL, cluster=year, p=1, q=1+1)))





##########################
##### (4.4) RD tables ----
##########################

### (4.4.0) Common headers ----
Header1 <- str_c ("\\toprule & & & & & & & & & \\multicolumn{3}{c}{power against}\\\\ \\cmidrule{10-12} \n")
Header2 <- str_c ("\\multicolumn{1}{l}{\\textbf{(a) Overall effect}} & \\multicolumn{1}{c}{outcome} & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header3 <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(b) Concurrent elections}} \\\\ \\midrule \n")
Header4 <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(c) Midterm elections}} \\\\ \\midrule \n")
Header5 <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(d) {\\it Conurbano}}} \\\\ \\midrule \n")
Header6 <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(e) Interior}} \\\\ \\midrule \n")
Header7 <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(f) Small municipalities}} \\\\ \\midrule \n")
Header8 <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(g) Large municipalities}} \\\\ \\midrule \n")



### (4.4.1) Main results: Win and % votes ----

# getting the estimates
(tab_main <- rd_results2 %>% 
   arrange (sample, outcome, ref_party2) %>% 
   filter (
     outcome %in% c ("winner_general", "votos_partido_pct_general")
     , ref_party == "all parties", parties_denom == "classif"
     , covs == "none", bwdselect == "mserd", poly == 1) %>% 
   select (comparison, outcome_full, est, ci, pval, bwd, n, sd_c:pow_beta))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{15.25cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}, using a triangular kernel and clustering the standard errors by election year.
  The running variable is the primary election margin between the first- and second-placed parties (odd-numbered rows) or the second- and third-placed ones (even-numbered rows).
  Only parties that classified to the general election are included in the denominator.
  The last three columns report how much statistical power the model has to detect an effect that is as large as (a) a standard deviation of the outcome variable in the control group (\\textsc{sd}$_{C}$); (b) half as much; or (c) equal in absolute value to the one we actually estimated ($|\\hat{\\tau}_{\\textsc{rd}}|$).
  Reported number of observations indicate {\\it effective} sample sizes. 
  \\end{minipage}}\\\\")

# exporting as a LaTeX table
addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 4
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 12
addtorow$pos[[6]] <- 16
addtorow$pos[[7]] <- 20
addtorow$pos[[8]] <- 24
addtorow$pos[[9]] <- 28
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, Header6, Header7, Header8, Bottom1)
print (xtable ( bind_cols (tab_main[,1:7], "", tab_main[,8:11])
                , align=c ("l","l","l","c","c","c","r","c","c","c","c","c","c")
                , digits=2
                , caption="\\textsc{rd} estimates: Effect of primary ranking on general election outcomes"
                , label="T:RDMainFull")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )


### (4.4.2) Robustness (I): Including all parties in the denominator ----

# getting the estimates
(tab_denom <- rd_results2 %>% 
   arrange (sample, outcome, ref_party2) %>% 
   filter (
     outcome %in% c ("winner_general", "votos_partido_pct_general")
     , ref_party == "all parties", parties_denom == "all"
     , covs == "none", bwdselect == "mserd", poly == 1) %>% 
   select (comparison, outcome_full, est, ci, pval, bwd, n, sd_c:pow_beta))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{15.25cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\citeA{calonico_etal2014}, using a triangular kernel and clustering the standard errors by election year.
  The running variable is the primary election margin between the first- and second-placed parties (odd-numbered rows) or the second- and third-placed ones (even-numbered rows).
  All parties that participated in the primary are included in the denominator.
  Reported number of observations indicate {\\it effective} sample sizes. 
  \\end{minipage}}\\\\")

# exporting as a LaTeX table
addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 4
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 12
addtorow$pos[[6]] <- 16
addtorow$pos[[7]] <- 20
addtorow$pos[[8]] <- 24
addtorow$pos[[9]] <- 28
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, Header6, Header7, Header8, Bottom1)
print (xtable ( bind_cols (tab_denom[,1:7], "", tab_denom[,8:11])
                , align=c ("l","l","l","c","c","c","r","c","c","c","c","c","c")
                , digits=2
                , caption="\\textsc{rd} estimates: Including all parties in the denominator"
                , label="T:RDRobQuali")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (4.4.3) Robustness (II): Including controls ----

# getting the estimates
(tab_controls <- rd_results2 %>% 
   arrange (sample, outcome, ref_party2) %>% 
   filter (
     outcome %in% c ("winner_general", "votos_partido_pct_general")
     , ref_party == "all parties", parties_denom == "classif"
     , covs == "all controls", bwdselect == "mserd", poly == 1) %>% 
   select (comparison, outcome_full, est, ci, pval, bwd, n, sd_c:pow_beta))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{15.25cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\cite{calonico_etal2014}, using a triangular kernel and clustering the standard errors by election year.
  The running variable is the primary election margin between the first- and second-placed parties (odd-numbered rows) or the second- and third-placed ones (even-numbered rows).
  Only parties that classified to the general election are included in the denominator.
  Specifications include controls for: council size; \\% turnout in the primary; a midterm dummy; a set of dummies indicating the president's party (which was perfectly collinear with the governor's during 2011-2023), the incumbent party at the local level, the \\textsc{pj} and the \\textsc{ucr}; a dummy indicating the party held a competitive primary; the number of factions participating in the primary; the \\% of (intra-party) votes obtained by the largest faction; and the within-party Golosov index in the primary.
  Reported number of observations indicate {\\it effective} sample sizes. 
  \\end{minipage}}\\\\")

# exporting as a LaTeX table
addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 4
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 12
addtorow$pos[[6]] <- 16
addtorow$pos[[7]] <- 20
addtorow$pos[[8]] <- 24
addtorow$pos[[9]] <- 28
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, Header6, Header7, Header8, Bottom1)
print (xtable ( bind_cols (tab_controls[,1:7], "", tab_controls[,8:11])
                , align=c ("l","l","l","c","c","c","r","c","c","c","c","c","c")
                , digits=2
                , caption="\\textsc{rd} estimates: Including controls"
                , label="T:RDRobControls")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (4.4.4) Robustness (III): CER-optimal bandwidth ----

# getting the estimates
(tab_cer <- rd_results2 %>% 
   arrange (sample, outcome, ref_party2) %>% 
   filter (
     outcome %in% c ("winner_general", "votos_partido_pct_general")
     , ref_party == "all parties", parties_denom == "classif"
     , covs == "none", bwdselect == "cerrd", poly == 1) %>% 
   select (comparison, outcome_full, est, ci, pval, bwd, n, sd_c:pow_beta))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{15.25cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{cer}-optimal bandwidth proposed by \\citeA{calonico_etal2014}, using a triangular kernel and clustering the standard errors by election year.
  The running variable is the primary election margin between the first- and second-placed parties (odd-numbered rows) or the second- and third-placed ones (even-numbered rows).
  Only parties that classified to the general election are included in the denominator.
  Reported number of observations indicate {\\it effective} sample sizes. 
  \\end{minipage}}\\\\")

# exporting as a LaTeX table
addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 4
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 12
addtorow$pos[[6]] <- 16
addtorow$pos[[7]] <- 20
addtorow$pos[[8]] <- 24
addtorow$pos[[9]] <- 28
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, Header6, Header7, Header8, Bottom1)
print (xtable ( bind_cols (tab_cer[,1:7], "", tab_cer[,8:11])
                , align=c ("l","l","l","c","c","c","r","c","c","c","c","c","c")
                , digits=2
                , caption="\\textsc{rd} estimates: \\textsc{cer}-optimal bandwidths"
                , label="T:RDRobCER")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (4.4.5) Robustness (IV): 2nd-order polynomials ----

# getting the estimates
(tab_poly2 <- rd_results2 %>% 
   arrange (sample, outcome, ref_party2) %>% 
   filter (
     outcome %in% c ("winner_general", "votos_partido_pct_general")
     , ref_party == "all parties", parties_denom == "classif"
     , covs == "none", bwdselect == "mserd", poly == 2) %>% 
   select (comparison, outcome_full, est, ci, pval, bwd, n, sd_c:pow_beta))

Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{15.25cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\citeA{calonico_etal2014}, using a second-order polynomial with a triangular kernel and clustering the standard errors by election year.
  The running variable is the primary election margin between the first- and second-placed parties (odd-numbered rows) or the second- and third-placed ones (even-numbered rows).
  Only parties that classified to the general election are included in the denominator.
  Reported number of observations indicate {\\it effective} sample sizes. 
  \\end{minipage}}\\\\")

# exporting as a LaTeX table
addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 4
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 12
addtorow$pos[[6]] <- 16
addtorow$pos[[7]] <- 20
addtorow$pos[[8]] <- 24
addtorow$pos[[9]] <- 28
addtorow$command <- c (Header1, Header2, Header3, Header4, Header5, Header6, Header7, Header8, Bottom1)
print (xtable ( bind_cols (tab_poly2[,1:7], "", tab_poly2[,8:11])
                , align=c ("l","l","l","c","c","c","r","c","c","c","c","c","c")
                , digits=2
                , caption="\\textsc{rd} estimates: Second-order polynomials"
                , label="T:RDRobPoly")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (4.4.6) Additional results (I): Results by reference party (Incumbent, PJ, UCR) ----

# getting the estimates
(tab_refs <- rd_results2 %>% 
   arrange (ref_party, sample, outcome) %>% 
   filter (
     outcome %in% c ("winner_general", "votos_partido_pct_general")
     , sample == "all observations"
     , ref_party != "all parties", parties_denom == "classif"
     , covs == "none", bwdselect == "mserd", poly == 1) %>% 
   select (comparison, outcome_full, est, ci, pval, bwd, n, sd_c:pow_beta))

Header2b <- str_c ("\\multicolumn{1}{l}{\\textbf{(a) Incumbent}} & \\multicolumn{1}{c}{outcome} & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header3b <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(b) Reference party: \\textsc{pj}}} \\\\ \\midrule \n")
Header4b <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(c) Reference party: \\textsc{ucr}}} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{15.05cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\citeA{calonico_etal2014}, using a triangular kernel and clustering the standard errors by municipality.
  Only pre-determined reference parties (the incumbent party at the moment of the primary, the \\textsc{pj} or the \\textsc{ucr}, respectively) are included in the sample.
  The running variable is the primary election margin between the first- and second-placed parties (odd-numbered rows) or the second- and third-placed ones (even-numbered rows).
  Only parties that classified to the general election are included in the denominator.
  Reported number of observations indicate {\\it effective} sample sizes. 
  \\end{minipage}}\\\\")

# exporting as a LaTeX table
addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 4
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 12
addtorow$command <- c (Header1, Header2b, Header3b, Header4b, Bottom1)
print (xtable ( bind_cols (tab_refs[,1:7], "", tab_refs[,8:11])
                , align=c ("l","l","l","c","c","c","r","c","c","c","c","c","c")
                , digits=2
                , caption="\\textsc{rd} estimates: For different reference parties"
                , label="T:RDRobRef")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )



### (4.4.7) Additional results (II): Comparing municipal, provincial and national effects ----

# getting the estimates
(tab_levels <- rd_results_level2 %>% 
   arrange (level, outcome, comparison) %>%
   filter (
     , covs == "none", bwdselect == "mserd", poly == 1) %>% 
   select (comparison, outcome_full, est, ci, pval, bwd, n, sd_c:pow_beta))

Header2b <- str_c ("\\multicolumn{1}{l}{\\textbf{(a) Municipal results (2013-23)}} & \\multicolumn{1}{c}{outcome} & \\multicolumn{1}{c}{estim.} & \\multicolumn{1}{c}{95\\% \\textsc{ci}} & \\multicolumn{1}{c}{$p$-val.} & \\multicolumn{1}{c}{bwd.} & \\multicolumn{1}{c}{$N^{-} | N^{+}$} & & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{\\textsc{sd}$_{C}$} & \\multicolumn{1}{c}{$\\frac{\\textsc{sd}_{C}}{2}$} & \\multicolumn{1}{c}{$|\\hat{\\tau}_{\\textsc{rd}}|$} \\\\ \\midrule \n")
Header3b <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(b) Provincial results (aggregated at the municipal level) (2011-23)}} \\\\ \\midrule \n")
Header4b <- str_c ("[1.5ex] \\multicolumn{12}{l}{\\textbf{(c) National results (aggregated at the municipal level) (2011-23)}} \\\\ \\midrule \n")
Bottom1 <- str_c ("\\bottomrule \\multicolumn{12}{l}{
  \\begin{minipage}{16.35cm}~\\\\
  \\footnotesize Sharp (conventional) \\textsc{rd} estimates, with robust \\textsc{ci}s and $p$-values based on the \\textsc{mse}-optimal bandwidth proposed by \\citeA{calonico_etal2014}, using a triangular kernel and clustering the standard errors by election year.
  The running variable is the primary election margin between the first- and second-placed parties (odd-numbered rows) or the second- and third-placed ones (even-numbered rows).
  Each panels report results for a different type of election: (a) municipal (2013-23 only); (b) provincial (with values aggregated by municipality, 2011-23); and (c) national (with values aggregated by municipality, 2011-23).
  Only parties that surpassed the threshold of 1.5\\% of positive votes in the municipality are included in the denominator.
  Reported number of observations indicate {\\it effective} sample sizes. 
  \\end{minipage}}\\\\")

# exporting as a LaTeX table
addtorow <- list ()
addtorow$pos <- list ()
addtorow$pos[[1]] <- 0
addtorow$pos[[2]] <- 0
addtorow$pos[[3]] <- 4
addtorow$pos[[4]] <- 8
addtorow$pos[[5]] <- 12
addtorow$command <- c (Header1, Header2b, Header3b, Header4b, Bottom1)
print (xtable ( bind_cols (tab_levels[,1:7], "", tab_levels[,8:11])
                , align=c ("l","l","l","c","c","c","r","c","c","c","c","c","c")
                , digits=2
                , caption="\\textsc{rd} estimates: Comparing municipal, provincial and national results"
                , label="T:RDRobLevels")
       , sanitize.text.function=function(x){x}
       , floating=TRUE
       , table.placement="t"
       , caption.placement="top" 
       , latex.environments="center"
       , size="footnotesize"
       , include.colnames=FALSE
       , include.rownames=FALSE
       , hline.after = c ()
       , add.to.row=addtorow )




################################################
################################################
########## (5) Exporting all figures ###########
################################################
################################################

### (5.1) Changing the working directory ----
setwd ("figures/") ## select the directory where you want to export your figures


### (5.2) Exporting the figures, by size ----

## (5.2.1) small, horizontal

pwid <- 15*1.6
phei <- 6*1.2

# proportion of dropouts
ggsave ("figDescDropouts.png"
        , p_dropouts, width=pwid, height=phei, units="cm", dpi=600)

# council sizes in 2011
ggsave ("figDescCouncilN.png"
        , p_counciln, width=pwid, height=phei*2, units="cm", dpi=600)


## (5.2.2) individual scatterplots

## concurrent vs. midterm

# scatterplot in levels: turnout
ggsave ("figDVMidtTurnout.png"
        , p_midt_turnout_delta, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in levels: positive votes
ggsave ("figDVMidtPositive.png"
        , p_midt_positive_delta, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in levels: votes 1+2
ggsave ("figDVMidtFirst2.png"
        , p_midt_sum2_delta, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in levels: Golosov
ggsave ("figDVMidtGolosov.png"
        , p_midt_golosov_delta, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in logs: turnout
ggsave ("figDVMidtTurnout_log.png"
        , p_midt_turnout_log, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in logs: positive votes
ggsave ("figDVMidtPositive_log.png"
        , p_midt_positive_log, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in logs: votes 1+2
ggsave ("figDVMidtFirst2_log.png"
        , p_midt_sum2_log, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in logs: Golosov
ggsave ("figDVMidtGolosov_log.png"
        , p_midt_golosov_log, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)


## small vs. large municipalities

# scatterplot in levels: turnout
ggsave ("figDVSizeTurnout.png"
        , p_size_turnout_delta, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in levels: positive votes
ggsave ("figDVSizePositive.png"
        , p_size_positive_delta, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in levels: votes 1+2
ggsave ("figDVSizeFirst2.png"
        , p_size_sum2_delta, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)

# scatterplot in levels: Golosov
ggsave ("figDVSizeGolosov.png"
        , p_size_golosov_delta, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)


## RD estimates conditional on first placed party's margin in primary

pwid <- 15*1.6
phei <- 6*1.2

ggsave ("figRDCondMarginWin.png"
        , cond_margin12_win, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)
ggsave ("figRDCondMarginSh.png"
        , cond_margin12_sh, width=pwid*.75, height=phei*2.5*.75, units="cm", dpi=600)


## (5.2.3) multiple plots (3x2)

pwid <- 15*1.5
phei <- 15*1.3

# outcomes over time
ggsave ("figDescOutcomeTS.png"
        , p_outcomes_series, width=pwid, height=phei*1.0, units="cm", dpi=600)

# density plots: margins and outcomes, by election type
ggsave ("figDescDensity.png"
        , p_density_levels, width=pwid*1.4, height=phei, units="cm", dpi=600)

# Google Trends data
ggsave ("figDescTrends.png"
        , p_google_trends, width=pwid*1.2, height=phei*1.1, units="cm", dpi=600)
ggsave ("figDescQueries.png"
        , p_google_queries, width=pwid*1.2, height=phei*1.4, units="cm", dpi=600)


## (5.2.4) multiple plots (3x2)

pwid <- 20*1.05
phei <- 17*1.25

# correlation between variables at different levels
ggsave ("figDescVariablesCorrel.png"
        , p_correl_levels, width=pwid*1.5, height=phei*1.8, units="cm", dpi=600)

# correlation between rankings
ggsave ("figDescRankingCorrelAll.png"
        , p_rank_correl, width=pwid, height=phei*1.25, units="cm", dpi=600)


## (5.2.5) RD plots (2x4 or 4x4)

pwid <- 16*1.45
phei <- 11*1.45

# full sample
ggsave ("figRDFull.png"
        , rd_all_full, width=pwid, height=phei, units="cm", dpi=600)

# heterogeneous effects
ggsave ("figRDMidt.png"
        , rd_all_midt, width=pwid*1.6, height=phei, units="cm", dpi=600)
ggsave ("figRDSize.png"
        , rd_all_size, width=pwid*1.6, height=phei, units="cm", dpi=600)


# balance checks
ggsave ("figRDBalance.png"
        , p_bal_all, width=pwid*1.6*1.4, height=phei*1.2, units="cm", dpi=600)

# alternative bandwidths
ggsave ("figRDBandwidthsAll.png"
        , bwd_current_all, width=pwid, height=phei, units="cm", dpi=600)



### (5.3) Exporting the workspace --> if you want to access the results later, uncomment and run this code ----

setwd (home)
# save.image ("Replication Results Three_Is_a_Crowd 2025-11-07.RData")
